This is a R Markdown file to accompany the mansucript: “Rumen microbial composition in Holstein and Jersey cows is different under same dietary condition and is not affected by sampling method” by Paz et al. 2016 in AEM. It was written in R markdown and converted to html using the R knitr package. This enables us to embed the results of our analyses directly into the text to allow for a reproducible data analysis pipeline. A github repository is available.
The analysis from Paz et al., 2016 can be recreated by using the associated R Markdown file. This is setup to be ran on Mac OS X and was initially performed with 8 GB RAM. No root access is neeeded. Analyses should also work in a linux environment as well if the linux versions of USEARCH and anaconda package manager are used.
Due to licensing constraints, USEARCH could not be included in the setup. To obtain a download link, go to the USEARCH download page and select version USEARCH v8.1.1756 for Mac OSX. A link (expires after 30 days) will be sent to the provided email. Use the link as an argument for shell script below.
Simply download the bash script from the github repository and run it (provide the link to download your licensed USEARCH version as an argument for setup.sh):
Anaconda is downloaded and prompts you during installataion of the packages above. The prompts are as follows:
To convert the R markdown to html use the command: render(“dairy_breeds.Rmd”). To start a R session and run the workflow, use these commands from within the direcotry you initiated installation:
The following packages and knitR settings were used to compile this Markdown:
install.packages("ggplot2", repos='http://cran.us.r-project.org')
## also installing the dependencies 'colorspace', 'dichromat', 'munsell', 'labeling', 'gtable', 'plyr', 'reshape2', 'scales'
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/RtmpdXjO6T/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("matrixStats", repos='http://cran.us.r-project.org')
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/RtmpdXjO6T/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("plyr", repos='http://cran.us.r-project.org')
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/RtmpdXjO6T/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("tidyr", repos='http://cran.us.r-project.org')
## also installing the dependencies 'assertthat', 'R6', 'DBI', 'BH', 'dplyr', 'lazyeval'
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/RtmpdXjO6T/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("biom", repos='http://cran.us.r-project.org')
## also installing the dependency 'RJSONIO'
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/RtmpdXjO6T/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("gplots", repos='http://cran.us.r-project.org')
## also installing the dependencies 'gtools', 'gdata'
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/RtmpdXjO6T/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("RColorBrewer", repos='http://cran.us.r-project.org')
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/RtmpdXjO6T/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("vegan", repos='http://cran.us.r-project.org')
## also installing the dependency 'permute'
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/RtmpdXjO6T/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("mvtnorm", repos='http://cran.us.r-project.org')
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/RtmpdXjO6T/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("modeltools", repos='http://cran.us.r-project.org')
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/RtmpdXjO6T/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("coin", repos='http://cran.us.r-project.org')
## also installing the dependencies 'zoo', 'TH.data', 'sandwich', 'multcomp'
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/RtmpdXjO6T/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
sessionInfo()
## R version 3.2.2 (2015-08-14)
## Platform: x86_64-apple-darwin11.4.2 (64-bit)
## Running under: OS X 10.10.5 (Yosemite)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] knitr_1.12.3 rmarkdown_0.9.2 BiocInstaller_1.20.1
##
## loaded via a namespace (and not attached):
## [1] magrittr_1.5 formatR_1.2.1 htmltools_0.3 tools_3.2.2 stringi_1.0-1
## [6] stringr_1.0.0 digest_0.6.9 evaluate_0.8
perm = 1e4
opts_chunk$set("tidy" = TRUE)
opts_chunk$set("echo" = TRUE)
opts_chunk$set("eval" = TRUE)
opts_chunk$set("warning" = FALSE)
opts_chunk$set("cache" = TRUE)
Download the file containing the dry matter intake (kg/d) and milk yield (kg/d) across cows.
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/production_measures.txt
## --2016-02-12 04:52:46-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/production_measures.txt
## Resolving raw.githubusercontent.com... 23.235.44.133
## Connecting to raw.githubusercontent.com|23.235.44.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 259 [text/plain]
## Saving to: 'production_measures.txt'
##
## 0K 100% 20.6M=0s
##
## 2016-02-12 04:52:47 (20.6 MB/s) - 'production_measures.txt' saved [259/259]
Generate bar chart of lactation measures for Holstein and Jersey breeds
library(tidyr)
library(plyr)
library(ggplot2)
production_wide <- read.table("production_measures.txt", header = T, sep = "\t")
production_long <- gather(production_wide, Measure, kg_d, DMI:MY)
production_summary <- ddply(production_long, c("Breed", "Measure"), summarise,
N = length(kg_d), mean = mean(kg_d), sd = sd(kg_d), se = sd/sqrt(N))
# Error bars represent standard error of the mean
production_plot <- ggplot(production_summary, aes(x = Measure, y = mean, fill = Breed)) +
ylab("kg/d") + scale_fill_brewer("Paired") + geom_bar(position = position_dodge(),
stat = "identity", color = "black", size = 0.5, width = 0.9) + geom_errorbar(aes(ymin = mean -
se, ymax = mean + se), size = 0.5, width = 0.3, position = position_dodge(0.9)) +
theme(legend.title = element_blank(), axis.text = element_text(color = "black",
size = 12, face = "bold"), axis.title = element_text(color = "black",
size = 12, face = "bold"), legend.text = element_text(color = "black",
size = 10, face = "bold")) + scale_x_discrete(breaks = c("DMI", "MY"),
labels = c("Dry Matter Intake", "Millk Yield"))
png("FigS1.png", units = "in", height = 5, width = 6, res = 300)
production_plot
dev.off()
## quartz_off_screen
## 2
pdf("FigS1.pdf", height = 6, width = 6)
production_plot
dev.off()
## quartz_off_screen
## 2
library(car)
cow_measures <- read.table("production_measures.txt", header = T, sep = "\t")
# Levene test Ho: Variances of breeds dry matter intake are equal
leveneTest(cow_measures$DMI ~ cow_measures$Breed)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.5255 0.492
## 7
# T test Ho: mean DMI Holstein = mean DMI Jersey Two-sided t test default
# options used: mu = 0, alt = 'two.sided', conf = 0.95, paired = F
t.test(cow_measures$DMI ~ cow_measures$Breed, var.eq = T)
##
## Two Sample t-test
##
## data: cow_measures$DMI by cow_measures$Breed
## t = 6.5245, df = 7, p-value = 0.0003265
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 2.929666 6.260334
## sample estimates:
## mean in group Holstein mean in group Jersey
## 23.820 19.225
# Levene test Ho: Variances of breeds milk yield are equal
leveneTest(cow_measures$MY ~ cow_measures$Breed)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.962 0.3594
## 7
# T test Ho: mean MY Holstein = mean MY Jersey Two-sided t test default
# options used: mu = 0, alt = 'two.sided', conf = 0.95, paired = F
t.test(cow_measures$MY ~ cow_measures$Breed, var.eq = T)
##
## Two Sample t-test
##
## data: cow_measures$MY by cow_measures$Breed
## t = 3.3752, df = 7, p-value = 0.01184
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 4.420873 25.109127
## sample estimates:
## mean in group Holstein mean in group Jersey
## 38.540 23.775
Sequences were downloaded from the Torrent server in a fastq file (seqs.fastq) which was used to generate a fasta file (seqs.fna). Initial quality control steps were performed using the Torrent Suite Software (refer to manuscript)
convert_fastaqual_fastq.py -c fastq_to_fastaqual –f seqs.fastq –o fastaqual
Because of file size limit in GitHub, the previously described seqs.fastq and seqs.fna files were not uploaded.
Demultiplex the sequences in the library using the mapping file.
split_libraries.py -f seqs.fna -b variable_length -l 0 -L 1000 -x -M 1 -m mappingfile.txt -o seqs_demultiplexed
The seqs_demultiplexed.fna generated is provided. Remove primer and subsequent sequence.
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/seqs_demultiplexed.fna.tar.gz
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/mappingfile.txt
tar -zxvf seqs_demultiplexed.fna.tar.gz
truncate_reverse_primer.py -f seqs_demultiplexed.fna -m mappingfile.txt -z truncate_only -M 2 -o seqs_revprimer_truncated
## --2016-02-12 04:52:52-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/seqs_demultiplexed.fna.tar.gz
## Resolving raw.githubusercontent.com... 23.235.44.133
## Connecting to raw.githubusercontent.com|23.235.44.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 13113749 (13M) [application/octet-stream]
## Saving to: 'seqs_demultiplexed.fna.tar.gz'
##
## 0K .......... .......... .......... .......... .......... 0% 1.07M 12s
## 50K .......... .......... .......... .......... .......... 0% 1.62M 10s
## 100K .......... .......... .......... .......... .......... 1% 1.69M 9s
## 150K .......... .......... .......... .......... .......... 1% 1.97M 8s
## 200K .......... .......... .......... .......... .......... 1% 1.84M 8s
## 250K .......... .......... .......... .......... .......... 2% 2.03M 8s
## 300K .......... .......... .......... .......... .......... 2% 1.96M 7s
## 350K .......... .......... .......... .......... .......... 3% 1.59M 7s
## 400K .......... .......... .......... .......... .......... 3% 2.00M 7s
## 450K .......... .......... .......... .......... .......... 3% 2.27M 7s
## 500K .......... .......... .......... .......... .......... 4% 1.83M 7s
## 550K .......... .......... .......... .......... .......... 4% 1.95M 7s
## 600K .......... .......... .......... .......... .......... 5% 2.24M 7s
## 650K .......... .......... .......... .......... .......... 5% 1.96M 7s
## 700K .......... .......... .......... .......... .......... 5% 2.59M 6s
## 750K .......... .......... .......... .......... .......... 6% 1.53M 6s
## 800K .......... .......... .......... .......... .......... 6% 2.39M 6s
## 850K .......... .......... .......... .......... .......... 7% 2.15M 6s
## 900K .......... .......... .......... .......... .......... 7% 2.24M 6s
## 950K .......... .......... .......... .......... .......... 7% 1.74M 6s
## 1000K .......... .......... .......... .......... .......... 8% 2.29M 6s
## 1050K .......... .......... .......... .......... .......... 8% 2.15M 6s
## 1100K .......... .......... .......... .......... .......... 8% 2.17M 6s
## 1150K .......... .......... .......... .......... .......... 9% 1.66M 6s
## 1200K .......... .......... .......... .......... .......... 9% 2.16M 6s
## 1250K .......... .......... .......... .......... .......... 10% 2.20M 6s
## 1300K .......... .......... .......... .......... .......... 10% 1.90M 6s
## 1350K .......... .......... .......... .......... .......... 10% 2.36M 6s
## 1400K .......... .......... .......... .......... .......... 11% 2.28M 6s
## 1450K .......... .......... .......... .......... .......... 11% 2.20M 6s
## 1500K .......... .......... .......... .......... .......... 12% 2.10M 6s
## 1550K .......... .......... .......... .......... .......... 12% 1.64M 6s
## 1600K .......... .......... .......... .......... .......... 12% 2.20M 6s
## 1650K .......... .......... .......... .......... .......... 13% 2.19M 6s
## 1700K .......... .......... .......... .......... .......... 13% 2.26M 6s
## 1750K .......... .......... .......... .......... .......... 14% 2.12M 5s
## 1800K .......... .......... .......... .......... .......... 14% 2.19M 5s
## 1850K .......... .......... .......... .......... .......... 14% 2.00M 5s
## 1900K .......... .......... .......... .......... .......... 15% 2.39M 5s
## 1950K .......... .......... .......... .......... .......... 15% 1.57M 5s
## 2000K .......... .......... .......... .......... .......... 16% 2.23M 5s
## 2050K .......... .......... .......... .......... .......... 16% 2.25M 5s
## 2100K .......... .......... .......... .......... .......... 16% 2.17M 5s
## 2150K .......... .......... .......... .......... .......... 17% 2.17M 5s
## 2200K .......... .......... .......... .......... .......... 17% 2.19M 5s
## 2250K .......... .......... .......... .......... .......... 17% 2.16M 5s
## 2300K .......... .......... .......... .......... .......... 18% 1.93M 5s
## 2350K .......... .......... .......... .......... .......... 18% 1.83M 5s
## 2400K .......... .......... .......... .......... .......... 19% 2.16M 5s
## 2450K .......... .......... .......... .......... .......... 19% 2.18M 5s
## 2500K .......... .......... .......... .......... .......... 19% 1.72M 5s
## 2550K .......... .......... .......... .......... .......... 20% 2.91M 5s
## 2600K .......... .......... .......... .......... .......... 20% 2.20M 5s
## 2650K .......... .......... .......... .......... .......... 21% 2.17M 5s
## 2700K .......... .......... .......... .......... .......... 21% 2.17M 5s
## 2750K .......... .......... .......... .......... .......... 21% 1.63M 5s
## 2800K .......... .......... .......... .......... .......... 22% 2.19M 5s
## 2850K .......... .......... .......... .......... .......... 22% 2.23M 5s
## 2900K .......... .......... .......... .......... .......... 23% 2.13M 5s
## 2950K .......... .......... .......... .......... .......... 23% 1.98M 5s
## 3000K .......... .......... .......... .......... .......... 23% 2.40M 5s
## 3050K .......... .......... .......... .......... .......... 24% 1.70M 5s
## 3100K .......... .......... .......... .......... .......... 24% 2.93M 5s
## 3150K .......... .......... .......... .......... .......... 24% 1.53M 5s
## 3200K .......... .......... .......... .......... .......... 25% 2.44M 5s
## 3250K .......... .......... .......... .......... .......... 25% 2.09M 5s
## 3300K .......... .......... .......... .......... .......... 26% 2.24M 5s
## 3350K .......... .......... .......... .......... .......... 26% 2.14M 5s
## 3400K .......... .......... .......... .......... .......... 26% 2.18M 5s
## 3450K .......... .......... .......... .......... .......... 27% 2.15M 4s
## 3500K .......... .......... .......... .......... .......... 27% 2.16M 4s
## 3550K .......... .......... .......... .......... .......... 28% 1.64M 4s
## 3600K .......... .......... .......... .......... .......... 28% 2.03M 4s
## 3650K .......... .......... .......... .......... .......... 28% 2.37M 4s
## 3700K .......... .......... .......... .......... .......... 29% 1.90M 4s
## 3750K .......... .......... .......... .......... .......... 29% 2.45M 4s
## 3800K .......... .......... .......... .......... .......... 30% 2.16M 4s
## 3850K .......... .......... .......... .......... .......... 30% 2.25M 4s
## 3900K .......... .......... .......... .......... .......... 30% 2.16M 4s
## 3950K .......... .......... .......... .......... .......... 31% 1.63M 4s
## 4000K .......... .......... .......... .......... .......... 31% 2.21M 4s
## 4050K .......... .......... .......... .......... .......... 32% 2.12M 4s
## 4100K .......... .......... .......... .......... .......... 32% 2.25M 4s
## 4150K .......... .......... .......... .......... .......... 32% 2.14M 4s
## 4200K .......... .......... .......... .......... .......... 33% 2.19M 4s
## 4250K .......... .......... .......... .......... .......... 33% 2.16M 4s
## 4300K .......... .......... .......... .......... .......... 33% 2.20M 4s
## 4350K .......... .......... .......... .......... .......... 34% 1.38M 4s
## 4400K .......... .......... .......... .......... .......... 34% 2.85M 4s
## 4450K .......... .......... .......... .......... .......... 35% 2.13M 4s
## 4500K .......... .......... .......... .......... .......... 35% 2.16M 4s
## 4550K .......... .......... .......... .......... .......... 35% 2.23M 4s
## 4600K .......... .......... .......... .......... .......... 36% 2.13M 4s
## 4650K .......... .......... .......... .......... .......... 36% 2.23M 4s
## 4700K .......... .......... .......... .......... .......... 37% 2.06M 4s
## 4750K .......... .......... .......... .......... .......... 37% 1.59M 4s
## 4800K .......... .......... .......... .......... .......... 37% 2.06M 4s
## 4850K .......... .......... .......... .......... .......... 38% 2.50M 4s
## 4900K .......... .......... .......... .......... .......... 38% 2.23M 4s
## 4950K .......... .......... .......... .......... .......... 39% 2.12M 4s
## 5000K .......... .......... .......... .......... .......... 39% 2.22M 4s
## 5050K .......... .......... .......... .......... .......... 39% 2.14M 4s
## 5100K .......... .......... .......... .......... .......... 40% 2.20M 4s
## 5150K .......... .......... .......... .......... .......... 40% 1.64M 4s
## 5200K .......... .......... .......... .......... .......... 40% 2.22M 4s
## 5250K .......... .......... .......... .......... .......... 41% 2.01M 4s
## 5300K .......... .......... .......... .......... .......... 41% 2.29M 4s
## 5350K .......... .......... .......... .......... .......... 42% 2.20M 4s
## 5400K .......... .......... .......... .......... .......... 42% 2.16M 4s
## 5450K .......... .......... .......... .......... .......... 42% 2.14M 3s
## 5500K .......... .......... .......... .......... .......... 43% 2.16M 3s
## 5550K .......... .......... .......... .......... .......... 43% 1.65M 3s
## 5600K .......... .......... .......... .......... .......... 44% 2.23M 3s
## 5650K .......... .......... .......... .......... .......... 44% 2.15M 3s
## 5700K .......... .......... .......... .......... .......... 44% 2.18M 3s
## 5750K .......... .......... .......... .......... .......... 45% 2.17M 3s
## 5800K .......... .......... .......... .......... .......... 45% 2.01M 3s
## 5850K .......... .......... .......... .......... .......... 46% 2.41M 3s
## 5900K .......... .......... .......... .......... .......... 46% 1.82M 3s
## 5950K .......... .......... .......... .......... .......... 46% 1.91M 3s
## 6000K .......... .......... .......... .......... .......... 47% 1.94M 3s
## 6050K .......... .......... .......... .......... .......... 47% 2.50M 3s
## 6100K .......... .......... .......... .......... .......... 48% 2.16M 3s
## 6150K .......... .......... .......... .......... .......... 48% 2.20M 3s
## 6200K .......... .......... .......... .......... .......... 48% 2.10M 3s
## 6250K .......... .......... .......... .......... .......... 49% 2.24M 3s
## 6300K .......... .......... .......... .......... .......... 49% 1.94M 3s
## 6350K .......... .......... .......... .......... .......... 49% 1.79M 3s
## 6400K .......... .......... .......... .......... .......... 50% 2.17M 3s
## 6450K .......... .......... .......... .......... .......... 50% 1.97M 3s
## 6500K .......... .......... .......... .......... .......... 51% 2.43M 3s
## 6550K .......... .......... .......... .......... .......... 51% 1.88M 3s
## 6600K .......... .......... .......... .......... .......... 51% 2.63M 3s
## 6650K .......... .......... .......... .......... .......... 52% 2.10M 3s
## 6700K .......... .......... .......... .......... .......... 52% 2.19M 3s
## 6750K .......... .......... .......... .......... .......... 53% 1.66M 3s
## 6800K .......... .......... .......... .......... .......... 53% 2.15M 3s
## 6850K .......... .......... .......... .......... .......... 53% 2.24M 3s
## 6900K .......... .......... .......... .......... .......... 54% 2.14M 3s
## 6950K .......... .......... .......... .......... .......... 54% 2.18M 3s
## 7000K .......... .......... .......... .......... .......... 55% 2.06M 3s
## 7050K .......... .......... .......... .......... .......... 55% 2.30M 3s
## 7100K .......... .......... .......... .......... .......... 55% 2.17M 3s
## 7150K .......... .......... .......... .......... .......... 56% 1.64M 3s
## 7200K .......... .......... .......... .......... .......... 56% 1.83M 3s
## 7250K .......... .......... .......... .......... .......... 57% 2.48M 3s
## 7300K .......... .......... .......... .......... .......... 57% 2.29M 3s
## 7350K .......... .......... .......... .......... .......... 57% 2.21M 3s
## 7400K .......... .......... .......... .......... .......... 58% 2.22M 3s
## 7450K .......... .......... .......... .......... .......... 58% 2.08M 3s
## 7500K .......... .......... .......... .......... .......... 58% 2.23M 2s
## 7550K .......... .......... .......... .......... .......... 59% 1.64M 2s
## 7600K .......... .......... .......... .......... .......... 59% 1.98M 2s
## 7650K .......... .......... .......... .......... .......... 60% 2.29M 2s
## 7700K .......... .......... .......... .......... .......... 60% 2.22M 2s
## 7750K .......... .......... .......... .......... .......... 60% 2.27M 2s
## 7800K .......... .......... .......... .......... .......... 61% 2.14M 2s
## 7850K .......... .......... .......... .......... .......... 61% 2.23M 2s
## 7900K .......... .......... .......... .......... .......... 62% 2.12M 2s
## 7950K .......... .......... .......... .......... .......... 62% 1.60M 2s
## 8000K .......... .......... .......... .......... .......... 62% 2.27M 2s
## 8050K .......... .......... .......... .......... .......... 63% 2.16M 2s
## 8100K .......... .......... .......... .......... .......... 63% 2.23M 2s
## 8150K .......... .......... .......... .......... .......... 64% 2.07M 2s
## 8200K .......... .......... .......... .......... .......... 64% 2.12M 2s
## 8250K .......... .......... .......... .......... .......... 64% 2.35M 2s
## 8300K .......... .......... .......... .......... .......... 65% 2.22M 2s
## 8350K .......... .......... .......... .......... .......... 65% 1.59M 2s
## 8400K .......... .......... .......... .......... .......... 65% 2.10M 2s
## 8450K .......... .......... .......... .......... .......... 66% 2.28M 2s
## 8500K .......... .......... .......... .......... .......... 66% 2.18M 2s
## 8550K .......... .......... .......... .......... .......... 67% 2.01M 2s
## 8600K .......... .......... .......... .......... .......... 67% 2.39M 2s
## 8650K .......... .......... .......... .......... .......... 67% 2.06M 2s
## 8700K .......... .......... .......... .......... .......... 68% 2.24M 2s
## 8750K .......... .......... .......... .......... .......... 68% 1.67M 2s
## 8800K .......... .......... .......... .......... .......... 69% 2.23M 2s
## 8850K .......... .......... .......... .......... .......... 69% 1.71M 2s
## 8900K .......... .......... .......... .......... .......... 69% 2.70M 2s
## 8950K .......... .......... .......... .......... .......... 70% 2.20M 2s
## 9000K .......... .......... .......... .......... .......... 70% 2.26M 2s
## 9050K .......... .......... .......... .......... .......... 71% 2.18M 2s
## 9100K .......... .......... .......... .......... .......... 71% 2.16M 2s
## 9150K .......... .......... .......... .......... .......... 71% 1.64M 2s
## 9200K .......... .......... .......... .......... .......... 72% 2.20M 2s
## 9250K .......... .......... .......... .......... .......... 72% 2.10M 2s
## 9300K .......... .......... .......... .......... .......... 73% 2.00M 2s
## 9350K .......... .......... .......... .......... .......... 73% 2.47M 2s
## 9400K .......... .......... .......... .......... .......... 73% 1.90M 2s
## 9450K .......... .......... .......... .......... .......... 74% 2.39M 2s
## 9500K .......... .......... .......... .......... .......... 74% 2.26M 2s
## 9550K .......... .......... .......... .......... .......... 74% 1.64M 2s
## 9600K .......... .......... .......... .......... .......... 75% 2.19M 1s
## 9650K .......... .......... .......... .......... .......... 75% 2.18M 1s
## 9700K .......... .......... .......... .......... .......... 76% 2.22M 1s
## 9750K .......... .......... .......... .......... .......... 76% 2.13M 1s
## 9800K .......... .......... .......... .......... .......... 76% 1.97M 1s
## 9850K .......... .......... .......... .......... .......... 77% 2.36M 1s
## 9900K .......... .......... .......... .......... .......... 77% 2.15M 1s
## 9950K .......... .......... .......... .......... .......... 78% 1.55M 1s
## 10000K .......... .......... .......... .......... .......... 78% 2.40M 1s
## 10050K .......... .......... .......... .......... .......... 78% 1.83M 1s
## 10100K .......... .......... .......... .......... .......... 79% 2.70M 1s
## 10150K .......... .......... .......... .......... .......... 79% 2.20M 1s
## 10200K .......... .......... .......... .......... .......... 80% 2.17M 1s
## 10250K .......... .......... .......... .......... .......... 80% 2.21M 1s
## 10300K .......... .......... .......... .......... .......... 80% 2.08M 1s
## 10350K .......... .......... .......... .......... .......... 81% 1.68M 1s
## 10400K .......... .......... .......... .......... .......... 81% 2.21M 1s
## 10450K .......... .......... .......... .......... .......... 81% 2.12M 1s
## 10500K .......... .......... .......... .......... .......... 82% 2.16M 1s
## 10550K .......... .......... .......... .......... .......... 82% 2.20M 1s
## 10600K .......... .......... .......... .......... .......... 83% 2.21M 1s
## 10650K .......... .......... .......... .......... .......... 83% 2.14M 1s
## 10700K .......... .......... .......... .......... .......... 83% 2.19M 1s
## 10750K .......... .......... .......... .......... .......... 84% 1.66M 1s
## 10800K .......... .......... .......... .......... .......... 84% 2.04M 1s
## 10850K .......... .......... .......... .......... .......... 85% 2.30M 1s
## 10900K .......... .......... .......... .......... .......... 85% 2.14M 1s
## 10950K .......... .......... .......... .......... .......... 85% 2.16M 1s
## 11000K .......... .......... .......... .......... .......... 86% 2.20M 1s
## 11050K .......... .......... .......... .......... .......... 86% 1.97M 1s
## 11100K .......... .......... .......... .......... .......... 87% 2.41M 1s
## 11150K .......... .......... .......... .......... .......... 87% 1.58M 1s
## 11200K .......... .......... .......... .......... .......... 87% 2.29M 1s
## 11250K .......... .......... .......... .......... .......... 88% 2.21M 1s
## 11300K .......... .......... .......... .......... .......... 88% 2.19M 1s
## 11350K .......... .......... .......... .......... .......... 89% 2.13M 1s
## 11400K .......... .......... .......... .......... .......... 89% 2.25M 1s
## 11450K .......... .......... .......... .......... .......... 89% 2.16M 1s
## 11500K .......... .......... .......... .......... .......... 90% 2.03M 1s
## 11550K .......... .......... .......... .......... .......... 90% 1.74M 1s
## 11600K .......... .......... .......... .......... .......... 90% 2.17M 1s
## 11650K .......... .......... .......... .......... .......... 91% 2.18M 1s
## 11700K .......... .......... .......... .......... .......... 91% 2.01M 0s
## 11750K .......... .......... .......... .......... .......... 92% 2.42M 0s
## 11800K .......... .......... .......... .......... .......... 92% 2.16M 0s
## 11850K .......... .......... .......... .......... .......... 92% 2.12M 0s
## 11900K .......... .......... .......... .......... .......... 93% 2.24M 0s
## 11950K .......... .......... .......... .......... .......... 93% 1.63M 0s
## 12000K .......... .......... .......... .......... .......... 94% 2.13M 0s
## 12050K .......... .......... .......... .......... .......... 94% 2.24M 0s
## 12100K .......... .......... .......... .......... .......... 94% 2.13M 0s
## 12150K .......... .......... .......... .......... .......... 95% 2.02M 0s
## 12200K .......... .......... .......... .......... .......... 95% 2.34M 0s
## 12250K .......... .......... .......... .......... .......... 96% 1.69M 0s
## 12300K .......... .......... .......... .......... .......... 96% 3.01M 0s
## 12350K .......... .......... .......... .......... .......... 96% 1.51M 0s
## 12400K .......... .......... .......... .......... .......... 97% 2.52M 0s
## 12450K .......... .......... .......... .......... .......... 97% 2.16M 0s
## 12500K .......... .......... .......... .......... .......... 97% 2.16M 0s
## 12550K .......... .......... .......... .......... .......... 98% 2.10M 0s
## 12600K .......... .......... .......... .......... .......... 98% 2.20M 0s
## 12650K .......... .......... .......... .......... .......... 99% 2.05M 0s
## 12700K .......... .......... .......... .......... .......... 99% 2.21M 0s
## 12750K .......... .......... .......... .......... .......... 99% 1.99M 0s
## 12800K ...... 100% 128K=6.0s
##
## 2016-02-12 04:52:59 (2.07 MB/s) - 'seqs_demultiplexed.fna.tar.gz' saved [13113749/13113749]
##
## --2016-02-12 04:52:59-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/mappingfile.txt
## Resolving raw.githubusercontent.com... 23.235.44.133
## Connecting to raw.githubusercontent.com|23.235.44.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 1307 (1.3K) [text/plain]
## Saving to: 'mappingfile.txt'
##
## 0K . 100% 125M=0s
##
## 2016-02-12 04:52:59 (125 MB/s) - 'mappingfile.txt' saved [1307/1307]
##
## x seqs_demultiplexed.fna
Trim the sequences to a fixed length (130 basepairs for this study) using 2 custom perl scripts to improve OTU selection in UPARSE downstream.
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/Scripts/min_max_length.pl
chmod 775 min_max_length.pl
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/Scripts/second_min_max_length.pl
chmod 775 second_min_max_length.pl
./min_max_length.pl -min=131 -max=131 -fasta=seqs_revprimer_truncated/seqs_demultiplexed_rev_primer_truncated.fna
./second_min_max_length.pl -min=130 -max=130 -fasta=seqs_trimmed.fasta
## --2016-02-12 05:07:56-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/Scripts/min_max_length.pl
## Resolving raw.githubusercontent.com... 23.235.44.133
## Connecting to raw.githubusercontent.com|23.235.44.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 1832 (1.8K) [text/plain]
## Saving to: 'min_max_length.pl'
##
## 0K . 100% 175M=0s
##
## 2016-02-12 05:07:57 (175 MB/s) - 'min_max_length.pl' saved [1832/1832]
##
## --2016-02-12 05:07:57-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/Scripts/second_min_max_length.pl
## Resolving raw.githubusercontent.com... 23.235.44.133
## Connecting to raw.githubusercontent.com|23.235.44.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 1906 (1.9K) [text/plain]
## Saving to: 'second_min_max_length.pl'
##
## 0K . 100% 165M=0s
##
## 2016-02-12 05:07:57 (165 MB/s) - 'second_min_max_length.pl' saved [1906/1906]
##
##
## Input read count: 458998
## Output read count: 397229
##
##
## Input read count: 397230
## Output read count: 397230
Reverse complement the sequences in mothur.
mothur "#reverse.seqs(fasta=seqs_finaltrimmed.fasta)"
## [H[2J
##
##
##
##
##
## mothur v.1.36.1
## Last updated: 7/27/2015
##
## by
## Patrick D. Schloss
##
## Department of Microbiology & Immunology
## University of Michigan
## pschloss@umich.edu
## http://www.mothur.org
##
## When using, please cite:
## Schloss, P.D., et al., Introducing mothur: Open-source, platform-independent, community-supported software for describing and comparing microbial communities. Appl Environ Microbiol, 2009. 75(23):7537-41.
##
## Distributed under the GNU General Public License
##
## Type 'help()' for information on the commands that are available
##
## Type 'quit()' to exit program
##
##
##
## mothur > reverse.seqs(fasta=seqs_finaltrimmed.fasta)
##
## Output File Names:
## seqs_finaltrimmed.rc.fasta
##
## mothur > quit()
Use a custom perl script to convert the fasta file from QIIME format to UPARSE format to generate the OTU table.
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/Scripts/qiime_to_usearch.pl
chmod 775 qiime_to_usearch.pl
./qiime_to_usearch.pl -fasta=seqs_finaltrimmed.rc.fasta -prefix=cow
mv format.fasta dairybreeds.format.fasta
Run the sequences through the UPARSE pipeline to pick OTUs.
svn export https://github.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/trunk/Scripts/usearch_python_scripts --non-interactive --trust-server-cert
chmod -R 775 usearch_python_scripts
chmod 775 usearch_python_scripts/uc2otutab.py
chmod 775 usearch_python_scripts/fasta_number.py
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/gold.fasta.gz
gzip -d gold.fasta.gz
chmod 775 gold.fasta
mkdir usearch_results
usearch -derep_fulllength dairybreeds.format.fasta -sizeout -fastaout usearch_results/dairybreeds.derep.fasta
usearch -sortbysize usearch_results/dairybreeds.derep.fasta -minsize 2 -fastaout usearch_results/dairybreeds.derep.sort.fasta
usearch -cluster_otus usearch_results/dairybreeds.derep.sort.fasta -otus usearch_results/dairybreeds.otus1.fasta
usearch -uchime_ref usearch_results/dairybreeds.otus1.fasta -db gold.fasta -strand plus -nonchimeras usearch_results/dairybreeds.otus1.nonchimera.fasta
python usearch_python_scripts/fasta_number.py usearch_results/dairybreeds.otus1.nonchimera.fasta > usearch_results/dairybreeds.otus2.fasta
usearch -usearch_global dairybreeds.format.fasta -db usearch_results/dairybreeds.otus2.fasta -strand plus -id 0.97 -uc usearch_results/dairybreeds.otu_map.uc
python usearch_python_scripts/uc2otutab.py usearch_results/dairybreeds.otu_map.uc > usearch_results/dairybreeds.otu_table.txt
cp usearch_results/dairybreeds.otu_table.txt ./
## A usearch_python_scripts
## A usearch_python_scripts/die.py
## A usearch_python_scripts/die.pyc
## A usearch_python_scripts/faqual2fastq.py
## A usearch_python_scripts/faqual2fastq2.py
## A usearch_python_scripts/fasta.py
## A usearch_python_scripts/fasta.pyc
## A usearch_python_scripts/fasta_number.py
## A usearch_python_scripts/fastq.py
## A usearch_python_scripts/fastq_strip_barcode_relabel.py
## A usearch_python_scripts/fastq_strip_barcode_relabel2.py
## A usearch_python_scripts/primer.py
## A usearch_python_scripts/progress.py
## A usearch_python_scripts/progress.pyc
## A usearch_python_scripts/uc.py
## A usearch_python_scripts/uc.pyc
## A usearch_python_scripts/uc2otutab.py
## Exported revision 13.
## --2016-02-12 05:08:11-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/gold.fasta.gz
## Resolving raw.githubusercontent.com... 23.235.44.133
## Connecting to raw.githubusercontent.com|23.235.44.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 3293704 (3.1M) [application/octet-stream]
## Saving to: 'gold.fasta.gz'
##
## 0K .......... .......... .......... .......... .......... 1% 239K 13s
## 50K .......... .......... .......... .......... .......... 3% 1008K 8s
## 100K .......... .......... .......... .......... .......... 4% 731K 7s
## 150K .......... .......... .......... .......... .......... 6% 358K 7s
## 200K .......... .......... .......... .......... .......... 7% 240K 8s
## 250K .......... .......... .......... .......... .......... 9% 1.21M 7s
## 300K .......... .......... .......... .......... .......... 10% 732K 6s
## 350K .......... .......... .......... .......... .......... 12% 283K 7s
## 400K .......... .......... .......... .......... .......... 13% 307K 7s
## 450K .......... .......... .......... .......... .......... 15% 1.51M 6s
## 500K .......... .......... .......... .......... .......... 17% 1.78M 6s
## 550K .......... .......... .......... .......... .......... 18% 1.37M 5s
## 600K .......... .......... .......... .......... .......... 20% 1.03M 5s
## 650K .......... .......... .......... .......... .......... 21% 536K 5s
## 700K .......... .......... .......... .......... .......... 23% 383K 5s
## 750K .......... .......... .......... .......... .......... 24% 357K 5s
## 800K .......... .......... .......... .......... .......... 26% 2.29M 5s
## 850K .......... .......... .......... .......... .......... 27% 1.28M 4s
## 900K .......... .......... .......... .......... .......... 29% 1.01M 4s
## 950K .......... .......... .......... .......... .......... 31% 742K 4s
## 1000K .......... .......... .......... .......... .......... 32% 295K 4s
## 1050K .......... .......... .......... .......... .......... 34% 367K 4s
## 1100K .......... .......... .......... .......... .......... 35% 2.24M 4s
## 1150K .......... .......... .......... .......... .......... 37% 1.42M 4s
## 1200K .......... .......... .......... .......... .......... 38% 1.02M 3s
## 1250K .......... .......... .......... .......... .......... 40% 919K 3s
## 1300K .......... .......... .......... .......... .......... 41% 346K 3s
## 1350K .......... .......... .......... .......... .......... 43% 302K 3s
## 1400K .......... .......... .......... .......... .......... 45% 1.27M 3s
## 1450K .......... .......... .......... .......... .......... 46% 2.16M 3s
## 1500K .......... .......... .......... .......... .......... 48% 2.19M 3s
## 1550K .......... .......... .......... .......... .......... 49% 1.58M 3s
## 1600K .......... .......... .......... .......... .......... 51% 995K 3s
## 1650K .......... .......... .......... .......... .......... 52% 1.13M 2s
## 1700K .......... .......... .......... .......... .......... 54% 569K 2s
## 1750K .......... .......... .......... .......... .......... 55% 597K 2s
## 1800K .......... .......... .......... .......... .......... 57% 299K 2s
## 1850K .......... .......... .......... .......... .......... 59% 2.23M 2s
## 1900K .......... .......... .......... .......... .......... 60% 2.14M 2s
## 1950K .......... .......... .......... .......... .......... 62% 1.60M 2s
## 2000K .......... .......... .......... .......... .......... 63% 1013K 2s
## 2050K .......... .......... .......... .......... .......... 65% 1.13M 2s
## 2100K .......... .......... .......... .......... .......... 66% 646K 2s
## 2150K .......... .......... .......... .......... .......... 68% 535K 2s
## 2200K .......... .......... .......... .......... .......... 69% 338K 2s
## 2250K .......... .......... .......... .......... .......... 71% 3.28M 1s
## 2300K .......... .......... .......... .......... .......... 73% 2.14M 1s
## 2350K .......... .......... .......... .......... .......... 74% 1.57M 1s
## 2400K .......... .......... .......... .......... .......... 76% 2.40M 1s
## 2450K .......... .......... .......... .......... .......... 77% 2.19M 1s
## 2500K .......... .......... .......... .......... .......... 79% 2.18M 1s
## 2550K .......... .......... .......... .......... .......... 80% 2.15M 1s
## 2600K .......... .......... .......... .......... .......... 82% 2.14M 1s
## 2650K .......... .......... .......... .......... .......... 83% 2.10M 1s
## 2700K .......... .......... .......... .......... .......... 85% 1.97M 1s
## 2750K .......... .......... .......... .......... .......... 87% 1.39M 1s
## 2800K .......... .......... .......... .......... .......... 88% 2.87M 1s
## 2850K .......... .......... .......... .......... .......... 90% 2.57M 0s
## 2900K .......... .......... .......... .......... .......... 91% 2.16M 0s
## 2950K .......... .......... .......... .......... .......... 93% 2.18M 0s
## 3000K .......... .......... .......... .......... .......... 94% 1.80M 0s
## 3050K .......... .......... .......... .......... .......... 96% 2.72M 0s
## 3100K .......... .......... .......... .......... .......... 97% 2.18M 0s
## 3150K .......... .......... .......... .......... .......... 99% 1.61M 0s
## 3200K .......... ...... 100% 75.7M=4.1s
##
## 2016-02-12 05:08:15 (793 KB/s) - 'gold.fasta.gz' saved [3293704/3293704]
##
## usearch v8.1.1861_i86osx32, 4.0Gb RAM (4.3Gb total), 4 cores
## (C) Copyright 2013-15 Robert C. Edgar, all rights reserved.
## http://drive5.com/usearch
##
## Licensed to: henry.paz@huskers.unl.edu
##
## 00:00 1.3Mb 0.1% Reading dairybreeds.format.fasta
00:00 114Mb 100.0% Reading dairybreeds.format.fasta
## 00:01 104Mb 397231 seqs, 108008 uniques, 82894 singletons (76.7%)
## 00:01 104Mb Min size 1, median 1, max 4275, avg 3.68
## 00:01 97Mb 0.0% Writing usearch_results/dairybreeds.derep.fasta
00:02 97Mb 26.1% Writing usearch_results/dairybreeds.derep.fasta
00:03 97Mb 93.1% Writing usearch_results/dairybreeds.derep.fasta
00:03 97Mb 100.0% Writing usearch_results/dairybreeds.derep.fasta
## usearch v8.1.1861_i86osx32, 4.0Gb RAM (4.3Gb total), 4 cores
## (C) Copyright 2013-15 Robert C. Edgar, all rights reserved.
## http://drive5.com/usearch
##
## Licensed to: henry.paz@huskers.unl.edu
##
## 00:00 1.3Mb 0.1% Reading usearch_results/dairybreeds.derep.fasta
00:00 40Mb 100.0% Reading usearch_results/dairybreeds.derep.fasta
## 00:00 22Mb Getting sizes
## 00:00 22Mb Sorting 25114 sequences
## 00:00 22Mb 0.0% Writing output
00:01 22Mb 63.8% Writing output
00:01 22Mb 100.0% Writing output
## usearch v8.1.1861_i86osx32, 4.0Gb RAM (4.3Gb total), 4 cores
## (C) Copyright 2013-15 Robert C. Edgar, all rights reserved.
## http://drive5.com/usearch
##
## Licensed to: henry.paz@huskers.unl.edu
##
## 00:00 2.4Mb 0.1% 0 OTUs, 0 chimeras (0.0%)
00:01 10.0Mb 16.1% 1076 OTUs, 216 chimeras (5.4%)
00:02 11Mb 36.3% 1733 OTUs, 739 chimeras (8.1%)
00:03 11Mb 52.7% 2157 OTUs, 1365 chimeras (10.3%)
00:04 12Mb 67.1% 2580 OTUs, 2009 chimeras (11.9%)
00:05 12Mb 81.1% 2923 OTUs, 2662 chimeras (13.0%)
00:06 12Mb 93.7% 3213 OTUs, 3260 chimeras (13.8%)
00:06 13Mb 100.0% 3371 OTUs, 3505 chimeras (14.0%)
## usearch v8.1.1861_i86osx32, 4.0Gb RAM (4.3Gb total), 4 cores
## (C) Copyright 2013-15 Robert C. Edgar, all rights reserved.
## http://drive5.com/usearch
##
## Licensed to: henry.paz@huskers.unl.edu
##
## 00:00 1.3Mb 0.1% Reading usearch_results/dairybreeds.otus1.fasta
00:00 2.5Mb 100.0% Reading usearch_results/dairybreeds.otus1.fasta
## 00:00 2.0Mb 0.1% Reading gold.fasta
00:00 35Mb 100.0% Reading gold.fasta
## 00:00 19Mb 0.1% Masking
00:01 19Mb 62.8% Masking
00:01 19Mb 100.0% Masking
## 00:01 20Mb 0.1% Word stats
00:01 20Mb 100.0% Word stats
## 00:01 20Mb 100.0% Alloc rows
## 00:01 20Mb 0.1% Build index
00:02 67Mb 25.1% Build index
00:02 76Mb 100.0% Build index
## 00:02 76Mb 0.0% Found 0/1 chimeras (0.0%), 0 not classified (0.0%)
00:03 82Mb 18.4% Found 15/621 chimeras (2.4%), 405 not classified (65.2%)
00:04 94Mb 59.0% Found 64/1989 chimeras (3.2%), 1527 not classified (76.8%)
00:04 104Mb 100.0% Found 110/3371 chimeras (3.3%), 2502 not classified (74.2%)
## 00:04 104Mb 0.0% Writing 3261 non-chimeras and unclassifieds
00:04 104Mb 100.0% Writing 3261 non-chimeras and unclassifieds
## usearch v8.1.1861_i86osx32, 4.0Gb RAM (4.3Gb total), 4 cores
## (C) Copyright 2013-15 Robert C. Edgar, all rights reserved.
## http://drive5.com/usearch
##
## Licensed to: henry.paz@huskers.unl.edu
##
## 00:00 1.3Mb 0.1% Reading usearch_results/dairybreeds.otus2.fasta
00:00 2.4Mb 100.0% Reading usearch_results/dairybreeds.otus2.fasta
## 00:00 1.9Mb 0.1% Masking
00:00 1.9Mb 100.0% Masking
## 00:00 2.8Mb 0.1% Word stats
00:00 2.8Mb 100.0% Word stats
## 00:00 2.8Mb 100.0% Alloc rows
## 00:00 2.8Mb 0.1% Build index
00:00 4.4Mb 100.0% Build index
## 00:00 4.4Mb 0.1% Searching, 0.0% matched
00:01 39Mb 7.1% Searching, 86.4% matched
00:02 39Mb 15.5% Searching, 86.5% matched
00:03 39Mb 24.8% Searching, 86.5% matched
00:04 39Mb 34.8% Searching, 86.6% matched
00:05 39Mb 45.3% Searching, 86.8% matched
00:06 39Mb 55.7% Searching, 86.9% matched
00:07 39Mb 66.1% Searching, 86.9% matched
00:08 39Mb 76.5% Searching, 87.0% matched
00:09 39Mb 87.0% Searching, 87.0% matched
00:10 39Mb 97.8% Searching, 87.1% matched
00:10 39Mb 100.0% Searching, 87.1% matched
## usearch_results/dairybreeds.otu_map.uc 0.0%
usearch_results/dairybreeds.otu_map.uc 12.6%
usearch_results/dairybreeds.otu_map.uc 24.7%
usearch_results/dairybreeds.otu_map.uc 36.7%
usearch_results/dairybreeds.otu_map.uc 48.6%
usearch_results/dairybreeds.otu_map.uc 61.3%
usearch_results/dairybreeds.otu_map.uc 74.2%
usearch_results/dairybreeds.otu_map.uc 87.0%
usearch_results/dairybreeds.otu_map.uc 99.6%
usearch_results/dairybreeds.otu_map.uc 100.0%
Assign taxonomy to the OTU representative sequences:
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/gg_13_5_otus.tar.gz
tar -zxvf gg_13_5_otus.tar.gz
assign_taxonomy.py -i usearch_results/dairybreeds.otus2.fasta -t gg_13_5_otus/97_otu_taxonomy.txt -r gg_13_5_otus/97_otus.fasta -o assigned_gg_taxa
## --2016-02-12 05:08:49-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/gg_13_5_otus.tar.gz
## Resolving raw.githubusercontent.com... 23.235.40.133
## Connecting to raw.githubusercontent.com|23.235.40.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 30969823 (30M) [application/octet-stream]
## Saving to: 'gg_13_5_otus.tar.gz'
##
## 0K .......... .......... .......... .......... .......... 0% 363K 83s
## 50K .......... .......... .......... .......... .......... 0% 936K 58s
## 100K .......... .......... .......... .......... .......... 0% 1.92M 43s
## 150K .......... .......... .......... .......... .......... 0% 2.38M 36s
## 200K .......... .......... .......... .......... .......... 0% 1.53M 32s
## 250K .......... .......... .......... .......... .......... 0% 1.75M 30s
## 300K .......... .......... .......... .......... .......... 1% 2.05M 27s
## 350K .......... .......... .......... .......... .......... 1% 2.44M 25s
## 400K .......... .......... .......... .......... .......... 1% 1.32M 25s
## 450K .......... .......... .......... .......... .......... 1% 2.46M 24s
## 500K .......... .......... .......... .......... .......... 1% 1.83M 23s
## 550K .......... .......... .......... .......... .......... 1% 2.13M 22s
## 600K .......... .......... .......... .......... .......... 2% 1.76M 22s
## 650K .......... .......... .......... .......... .......... 2% 3.06M 21s
## 700K .......... .......... .......... .......... .......... 2% 2.77M 20s
## 750K .......... .......... .......... .......... .......... 2% 2.56M 19s
## 800K .......... .......... .......... .......... .......... 2% 1.73M 19s
## 850K .......... .......... .......... .......... .......... 2% 2.13M 19s
## 900K .......... .......... .......... .......... .......... 3% 2.08M 19s
## 950K .......... .......... .......... .......... .......... 3% 2.23M 18s
## 1000K .......... .......... .......... .......... .......... 3% 2.22M 18s
## 1050K .......... .......... .......... .......... .......... 3% 2.27M 18s
## 1100K .......... .......... .......... .......... .......... 3% 2.11M 17s
## 1150K .......... .......... .......... .......... .......... 3% 1.87M 17s
## 1200K .......... .......... .......... .......... .......... 4% 1.84M 17s
## 1250K .......... .......... .......... .......... .......... 4% 2.19M 17s
## 1300K .......... .......... .......... .......... .......... 4% 2.21M 17s
## 1350K .......... .......... .......... .......... .......... 4% 2.20M 17s
## 1400K .......... .......... .......... .......... .......... 4% 2.23M 17s
## 1450K .......... .......... .......... .......... .......... 4% 1.97M 16s
## 1500K .......... .......... .......... .......... .......... 5% 1.87M 16s
## 1550K .......... .......... .......... .......... .......... 5% 2.79M 16s
## 1600K .......... .......... .......... .......... .......... 5% 1.61M 16s
## 1650K .......... .......... .......... .......... .......... 5% 2.29M 16s
## 1700K .......... .......... .......... .......... .......... 5% 2.19M 16s
## 1750K .......... .......... .......... .......... .......... 5% 2.04M 16s
## 1800K .......... .......... .......... .......... .......... 6% 1.86M 16s
## 1850K .......... .......... .......... .......... .......... 6% 2.80M 16s
## 1900K .......... .......... .......... .......... .......... 6% 2.11M 15s
## 1950K .......... .......... .......... .......... .......... 6% 2.03M 15s
## 2000K .......... .......... .......... .......... .......... 6% 1.75M 15s
## 2050K .......... .......... .......... .......... .......... 6% 2.14M 15s
## 2100K .......... .......... .......... .......... .......... 7% 2.22M 15s
## 2150K .......... .......... .......... .......... .......... 7% 1.77M 15s
## 2200K .......... .......... .......... .......... .......... 7% 2.51M 15s
## 2250K .......... .......... .......... .......... .......... 7% 2.20M 15s
## 2300K .......... .......... .......... .......... .......... 7% 2.31M 15s
## 2350K .......... .......... .......... .......... .......... 7% 2.20M 15s
## 2400K .......... .......... .......... .......... .......... 8% 1.64M 15s
## 2450K .......... .......... .......... .......... .......... 8% 2.20M 15s
## 2500K .......... .......... .......... .......... .......... 8% 2.09M 15s
## 2550K .......... .......... .......... .......... .......... 8% 2.20M 15s
## 2600K .......... .......... .......... .......... .......... 8% 1.92M 15s
## 2650K .......... .......... .......... .......... .......... 8% 2.51M 14s
## 2700K .......... .......... .......... .......... .......... 9% 2.04M 14s
## 2750K .......... .......... .......... .......... .......... 9% 2.41M 14s
## 2800K .......... .......... .......... .......... .......... 9% 1.44M 14s
## 2850K .......... .......... .......... .......... .......... 9% 2.47M 14s
## 2900K .......... .......... .......... .......... .......... 9% 1.76M 14s
## 2950K .......... .......... .......... .......... .......... 9% 2.90M 14s
## 3000K .......... .......... .......... .......... .......... 10% 2.27M 14s
## 3050K .......... .......... .......... .......... .......... 10% 2.07M 14s
## 3100K .......... .......... .......... .......... .......... 10% 2.33M 14s
## 3150K .......... .......... .......... .......... .......... 10% 2.07M 14s
## 3200K .......... .......... .......... .......... .......... 10% 1.48M 14s
## 3250K .......... .......... .......... .......... .......... 10% 2.30M 14s
## 3300K .......... .......... .......... .......... .......... 11% 2.40M 14s
## 3350K .......... .......... .......... .......... .......... 11% 2.13M 14s
## 3400K .......... .......... .......... .......... .......... 11% 2.24M 14s
## 3450K .......... .......... .......... .......... .......... 11% 2.27M 14s
## 3500K .......... .......... .......... .......... .......... 11% 2.14M 14s
## 3550K .......... .......... .......... .......... .......... 11% 2.23M 14s
## 3600K .......... .......... .......... .......... .......... 12% 1.66M 14s
## 3650K .......... .......... .......... .......... .......... 12% 2.03M 14s
## 3700K .......... .......... .......... .......... .......... 12% 2.18M 13s
## 3750K .......... .......... .......... .......... .......... 12% 2.16M 13s
## 3800K .......... .......... .......... .......... .......... 12% 2.21M 13s
## 3850K .......... .......... .......... .......... .......... 12% 2.22M 13s
## 3900K .......... .......... .......... .......... .......... 13% 2.16M 13s
## 3950K .......... .......... .......... .......... .......... 13% 2.06M 13s
## 4000K .......... .......... .......... .......... .......... 13% 1.27M 13s
## 4050K .......... .......... .......... .......... .......... 13% 2.11M 13s
## 4100K .......... .......... .......... .......... .......... 13% 2.67M 13s
## 4150K .......... .......... .......... .......... .......... 13% 2.39M 13s
## 4200K .......... .......... .......... .......... .......... 14% 2.59M 13s
## 4250K .......... .......... .......... .......... .......... 14% 1.97M 13s
## 4300K .......... .......... .......... .......... .......... 14% 2.02M 13s
## 4350K .......... .......... .......... .......... .......... 14% 1.41M 13s
## 4400K .......... .......... .......... .......... .......... 14% 1.46M 13s
## 4450K .......... .......... .......... .......... .......... 14% 1.68M 13s
## 4500K .......... .......... .......... .......... .......... 15% 2.88M 13s
## 4550K .......... .......... .......... .......... .......... 15% 2.03M 13s
## 4600K .......... .......... .......... .......... .......... 15% 4.17M 13s
## 4650K .......... .......... .......... .......... .......... 15% 1.94M 13s
## 4700K .......... .......... .......... .......... .......... 15% 4.26M 13s
## 4750K .......... .......... .......... .......... .......... 15% 2.44M 13s
## 4800K .......... .......... .......... .......... .......... 16% 823K 13s
## 4850K .......... .......... .......... .......... .......... 16% 2.66M 13s
## 4900K .......... .......... .......... .......... .......... 16% 2.16M 13s
## 4950K .......... .......... .......... .......... .......... 16% 1.98M 13s
## 5000K .......... .......... .......... .......... .......... 16% 2.31M 13s
## 5050K .......... .......... .......... .......... .......... 16% 2.02M 13s
## 5100K .......... .......... .......... .......... .......... 17% 2.38M 13s
## 5150K .......... .......... .......... .......... .......... 17% 2.34M 13s
## 5200K .......... .......... .......... .......... .......... 17% 1.56M 13s
## 5250K .......... .......... .......... .......... .......... 17% 1.99M 13s
## 5300K .......... .......... .......... .......... .......... 17% 2.58M 12s
## 5350K .......... .......... .......... .......... .......... 17% 2.13M 12s
## 5400K .......... .......... .......... .......... .......... 18% 2.13M 12s
## 5450K .......... .......... .......... .......... .......... 18% 2.24M 12s
## 5500K .......... .......... .......... .......... .......... 18% 1.76M 12s
## 5550K .......... .......... .......... .......... .......... 18% 2.61M 12s
## 5600K .......... .......... .......... .......... .......... 18% 1.58M 12s
## 5650K .......... .......... .......... .......... .......... 18% 2.41M 12s
## 5700K .......... .......... .......... .......... .......... 19% 2.06M 12s
## 5750K .......... .......... .......... .......... .......... 19% 2.43M 12s
## 5800K .......... .......... .......... .......... .......... 19% 2.09M 12s
## 5850K .......... .......... .......... .......... .......... 19% 2.25M 12s
## 5900K .......... .......... .......... .......... .......... 19% 2.12M 12s
## 5950K .......... .......... .......... .......... .......... 19% 1.94M 12s
## 6000K .......... .......... .......... .......... .......... 20% 1.83M 12s
## 6050K .......... .......... .......... .......... .......... 20% 2.05M 12s
## 6100K .......... .......... .......... .......... .......... 20% 2.32M 12s
## 6150K .......... .......... .......... .......... .......... 20% 2.09M 12s
## 6200K .......... .......... .......... .......... .......... 20% 2.28M 12s
## 6250K .......... .......... .......... .......... .......... 20% 2.13M 12s
## 6300K .......... .......... .......... .......... .......... 20% 2.21M 12s
## 6350K .......... .......... .......... .......... .......... 21% 1.65M 12s
## 6400K .......... .......... .......... .......... .......... 21% 1.93M 12s
## 6450K .......... .......... .......... .......... .......... 21% 2.45M 12s
## 6500K .......... .......... .......... .......... .......... 21% 2.19M 12s
## 6550K .......... .......... .......... .......... .......... 21% 2.16M 12s
## 6600K .......... .......... .......... .......... .......... 21% 2.05M 12s
## 6650K .......... .......... .......... .......... .......... 22% 2.17M 12s
## 6700K .......... .......... .......... .......... .......... 22% 1.97M 12s
## 6750K .......... .......... .......... .......... .......... 22% 2.62M 12s
## 6800K .......... .......... .......... .......... .......... 22% 1.64M 12s
## 6850K .......... .......... .......... .......... .......... 22% 2.13M 12s
## 6900K .......... .......... .......... .......... .......... 22% 2.28M 12s
## 6950K .......... .......... .......... .......... .......... 23% 2.13M 11s
## 7000K .......... .......... .......... .......... .......... 23% 1.81M 11s
## 7050K .......... .......... .......... .......... .......... 23% 2.65M 11s
## 7100K .......... .......... .......... .......... .......... 23% 2.03M 11s
## 7150K .......... .......... .......... .......... .......... 23% 2.17M 11s
## 7200K .......... .......... .......... .......... .......... 23% 1.73M 11s
## 7250K .......... .......... .......... .......... .......... 24% 2.20M 11s
## 7300K .......... .......... .......... .......... .......... 24% 1.99M 11s
## 7350K .......... .......... .......... .......... .......... 24% 2.00M 11s
## 7400K .......... .......... .......... .......... .......... 24% 2.20M 11s
## 7450K .......... .......... .......... .......... .......... 24% 2.43M 11s
## 7500K .......... .......... .......... .......... .......... 24% 2.28M 11s
## 7550K .......... .......... .......... .......... .......... 25% 2.21M 11s
## 7600K .......... .......... .......... .......... .......... 25% 1.47M 11s
## 7650K .......... .......... .......... .......... .......... 25% 2.18M 11s
## 7700K .......... .......... .......... .......... .......... 25% 2.63M 11s
## 7750K .......... .......... .......... .......... .......... 25% 2.07M 11s
## 7800K .......... .......... .......... .......... .......... 25% 2.04M 11s
## 7850K .......... .......... .......... .......... .......... 26% 2.37M 11s
## 7900K .......... .......... .......... .......... .......... 26% 2.15M 11s
## 7950K .......... .......... .......... .......... .......... 26% 2.24M 11s
## 8000K .......... .......... .......... .......... .......... 26% 1.41M 11s
## 8050K .......... .......... .......... .......... .......... 26% 2.59M 11s
## 8100K .......... .......... .......... .......... .......... 26% 2.08M 11s
## 8150K .......... .......... .......... .......... .......... 27% 2.40M 11s
## 8200K .......... .......... .......... .......... .......... 27% 2.15M 11s
## 8250K .......... .......... .......... .......... .......... 27% 2.12M 11s
## 8300K .......... .......... .......... .......... .......... 27% 2.25M 11s
## 8350K .......... .......... .......... .......... .......... 27% 2.23M 11s
## 8400K .......... .......... .......... .......... .......... 27% 1.53M 11s
## 8450K .......... .......... .......... .......... .......... 28% 2.21M 11s
## 8500K .......... .......... .......... .......... .......... 28% 2.30M 11s
## 8550K .......... .......... .......... .......... .......... 28% 2.04M 11s
## 8600K .......... .......... .......... .......... .......... 28% 2.29M 11s
## 8650K .......... .......... .......... .......... .......... 28% 2.21M 11s
## 8700K .......... .......... .......... .......... .......... 28% 2.20M 10s
## 8750K .......... .......... .......... .......... .......... 29% 2.19M 10s
## 8800K .......... .......... .......... .......... .......... 29% 1.65M 10s
## 8850K .......... .......... .......... .......... .......... 29% 1.37M 10s
## 8900K .......... .......... .......... .......... .......... 29% 3.20M 10s
## 8950K .......... .......... .......... .......... .......... 29% 2.84M 10s
## 9000K .......... .......... .......... .......... .......... 29% 2.22M 10s
## 9050K .......... .......... .......... .......... .......... 30% 1.21M 10s
## 9100K .......... .......... .......... .......... .......... 30% 620K 10s
## 9150K .......... .......... .......... .......... .......... 30% 1.47M 10s
## 9200K .......... .......... .......... .......... .......... 30% 2.19M 10s
## 9250K .......... .......... .......... .......... .......... 30% 2.77M 10s
## 9300K .......... .......... .......... .......... .......... 30% 2.37M 10s
## 9350K .......... .......... .......... .......... .......... 31% 2.47M 10s
## 9400K .......... .......... .......... .......... .......... 31% 2.83M 10s
## 9450K .......... .......... .......... .......... .......... 31% 2.43M 10s
## 9500K .......... .......... .......... .......... .......... 31% 1.54M 10s
## 9550K .......... .......... .......... .......... .......... 31% 2.78M 10s
## 9600K .......... .......... .......... .......... .......... 31% 2.48M 10s
## 9650K .......... .......... .......... .......... .......... 32% 3.96M 10s
## 9700K .......... .......... .......... .......... .......... 32% 1.55M 10s
## 9750K .......... .......... .......... .......... .......... 32% 1.72M 10s
## 9800K .......... .......... .......... .......... .......... 32% 3.83M 10s
## 9850K .......... .......... .......... .......... .......... 32% 4.23M 10s
## 9900K .......... .......... .......... .......... .......... 32% 4.12M 10s
## 9950K .......... .......... .......... .......... .......... 33% 2.14M 10s
## 10000K .......... .......... .......... .......... .......... 33% 2.21M 10s
## 10050K .......... .......... .......... .......... .......... 33% 5.35M 10s
## 10100K .......... .......... .......... .......... .......... 33% 3.85M 10s
## 10150K .......... .......... .......... .......... .......... 33% 2.18M 10s
## 10200K .......... .......... .......... .......... .......... 33% 1.68M 10s
## 10250K .......... .......... .......... .......... .......... 34% 2.93M 10s
## 10300K .......... .......... .......... .......... .......... 34% 2.19M 10s
## 10350K .......... .......... .......... .......... .......... 34% 2.28M 10s
## 10400K .......... .......... .......... .......... .......... 34% 1.63M 10s
## 10450K .......... .......... .......... .......... .......... 34% 2.21M 10s
## 10500K .......... .......... .......... .......... .......... 34% 1.66M 10s
## 10550K .......... .......... .......... .......... .......... 35% 3.03M 10s
## 10600K .......... .......... .......... .......... .......... 35% 2.18M 9s
## 10650K .......... .......... .......... .......... .......... 35% 2.05M 9s
## 10700K .......... .......... .......... .......... .......... 35% 2.21M 9s
## 10750K .......... .......... .......... .......... .......... 35% 2.30M 9s
## 10800K .......... .......... .......... .......... .......... 35% 1.64M 9s
## 10850K .......... .......... .......... .......... .......... 36% 1.79M 9s
## 10900K .......... .......... .......... .......... .......... 36% 2.78M 9s
## 10950K .......... .......... .......... .......... .......... 36% 1.94M 9s
## 11000K .......... .......... .......... .......... .......... 36% 2.50M 9s
## 11050K .......... .......... .......... .......... .......... 36% 2.07M 9s
## 11100K .......... .......... .......... .......... .......... 36% 2.21M 9s
## 11150K .......... .......... .......... .......... .......... 37% 2.19M 9s
## 11200K .......... .......... .......... .......... .......... 37% 1.64M 9s
## 11250K .......... .......... .......... .......... .......... 37% 2.21M 9s
## 11300K .......... .......... .......... .......... .......... 37% 2.07M 9s
## 11350K .......... .......... .......... .......... .......... 37% 2.29M 9s
## 11400K .......... .......... .......... .......... .......... 37% 2.15M 9s
## 11450K .......... .......... .......... .......... .......... 38% 2.23M 9s
## 11500K .......... .......... .......... .......... .......... 38% 2.16M 9s
## 11550K .......... .......... .......... .......... .......... 38% 2.15M 9s
## 11600K .......... .......... .......... .......... .......... 38% 1.49M 9s
## 11650K .......... .......... .......... .......... .......... 38% 2.51M 9s
## 11700K .......... .......... .......... .......... .......... 38% 2.17M 9s
## 11750K .......... .......... .......... .......... .......... 39% 2.12M 9s
## 11800K .......... .......... .......... .......... .......... 39% 2.11M 9s
## 11850K .......... .......... .......... .......... .......... 39% 2.22M 9s
## 11900K .......... .......... .......... .......... .......... 39% 2.21M 9s
## 11950K .......... .......... .......... .......... .......... 39% 2.25M 9s
## 12000K .......... .......... .......... .......... .......... 39% 1.55M 9s
## 12050K .......... .......... .......... .......... .......... 40% 2.20M 9s
## 12100K .......... .......... .......... .......... .......... 40% 2.24M 9s
## 12150K .......... .......... .......... .......... .......... 40% 2.13M 9s
## 12200K .......... .......... .......... .......... .......... 40% 2.23M 9s
## 12250K .......... .......... .......... .......... .......... 40% 2.07M 9s
## 12300K .......... .......... .......... .......... .......... 40% 2.27M 9s
## 12350K .......... .......... .......... .......... .......... 40% 2.03M 9s
## 12400K .......... .......... .......... .......... .......... 41% 1.73M 9s
## 12450K .......... .......... .......... .......... .......... 41% 2.18M 9s
## 12500K .......... .......... .......... .......... .......... 41% 2.13M 9s
## 12550K .......... .......... .......... .......... .......... 41% 2.30M 9s
## 12600K .......... .......... .......... .......... .......... 41% 2.05M 8s
## 12650K .......... .......... .......... .......... .......... 41% 2.25M 8s
## 12700K .......... .......... .......... .......... .......... 42% 1.64M 8s
## 12750K .......... .......... .......... .......... .......... 42% 2.45M 8s
## 12800K .......... .......... .......... .......... .......... 42% 1.97M 8s
## 12850K .......... .......... .......... .......... .......... 42% 2.11M 8s
## 12900K .......... .......... .......... .......... .......... 42% 2.26M 8s
## 12950K .......... .......... .......... .......... .......... 42% 2.08M 8s
## 13000K .......... .......... .......... .......... .......... 43% 2.11M 8s
## 13050K .......... .......... .......... .......... .......... 43% 1.97M 8s
## 13100K .......... .......... .......... .......... .......... 43% 2.49M 8s
## 13150K .......... .......... .......... .......... .......... 43% 2.08M 8s
## 13200K .......... .......... .......... .......... .......... 43% 1.74M 8s
## 13250K .......... .......... .......... .......... .......... 43% 2.21M 8s
## 13300K .......... .......... .......... .......... .......... 44% 2.06M 8s
## 13350K .......... .......... .......... .......... .......... 44% 1.82M 8s
## 13400K .......... .......... .......... .......... .......... 44% 2.77M 8s
## 13450K .......... .......... .......... .......... .......... 44% 2.27M 8s
## 13500K .......... .......... .......... .......... .......... 44% 1.95M 8s
## 13550K .......... .......... .......... .......... .......... 44% 2.37M 8s
## 13600K .......... .......... .......... .......... .......... 45% 1.61M 8s
## 13650K .......... .......... .......... .......... .......... 45% 2.09M 8s
## 13700K .......... .......... .......... .......... .......... 45% 1.96M 8s
## 13750K .......... .......... .......... .......... .......... 45% 2.58M 8s
## 13800K .......... .......... .......... .......... .......... 45% 1.90M 8s
## 13850K .......... .......... .......... .......... .......... 45% 2.55M 8s
## 13900K .......... .......... .......... .......... .......... 46% 2.16M 8s
## 13950K .......... .......... .......... .......... .......... 46% 2.30M 8s
## 14000K .......... .......... .......... .......... .......... 46% 1.42M 8s
## 14050K .......... .......... .......... .......... .......... 46% 2.64M 8s
## 14100K .......... .......... .......... .......... .......... 46% 2.18M 8s
## 14150K .......... .......... .......... .......... .......... 46% 2.09M 8s
## 14200K .......... .......... .......... .......... .......... 47% 2.16M 8s
## 14250K .......... .......... .......... .......... .......... 47% 2.22M 8s
## 14300K .......... .......... .......... .......... .......... 47% 2.21M 8s
## 14350K .......... .......... .......... .......... .......... 47% 2.30M 8s
## 14400K .......... .......... .......... .......... .......... 47% 1.58M 8s
## 14450K .......... .......... .......... .......... .......... 47% 2.01M 8s
## 14500K .......... .......... .......... .......... .......... 48% 2.39M 8s
## 14550K .......... .......... .......... .......... .......... 48% 1.90M 8s
## 14600K .......... .......... .......... .......... .......... 48% 2.58M 7s
## 14650K .......... .......... .......... .......... .......... 48% 2.11M 7s
## 14700K .......... .......... .......... .......... .......... 48% 2.20M 7s
## 14750K .......... .......... .......... .......... .......... 48% 2.17M 7s
## 14800K .......... .......... .......... .......... .......... 49% 1.54M 7s
## 14850K .......... .......... .......... .......... .......... 49% 2.25M 7s
## 14900K .......... .......... .......... .......... .......... 49% 2.21M 7s
## 14950K .......... .......... .......... .......... .......... 49% 2.32M 7s
## 15000K .......... .......... .......... .......... .......... 49% 2.12M 7s
## 15050K .......... .......... .......... .......... .......... 49% 2.17M 7s
## 15100K .......... .......... .......... .......... .......... 50% 2.14M 7s
## 15150K .......... .......... .......... .......... .......... 50% 2.21M 7s
## 15200K .......... .......... .......... .......... .......... 50% 1.64M 7s
## 15250K .......... .......... .......... .......... .......... 50% 2.16M 7s
## 15300K .......... .......... .......... .......... .......... 50% 2.25M 7s
## 15350K .......... .......... .......... .......... .......... 50% 2.17M 7s
## 15400K .......... .......... .......... .......... .......... 51% 2.13M 7s
## 15450K .......... .......... .......... .......... .......... 51% 2.16M 7s
## 15500K .......... .......... .......... .......... .......... 51% 2.24M 7s
## 15550K .......... .......... .......... .......... .......... 51% 1.77M 7s
## 15600K .......... .......... .......... .......... .......... 51% 1.98M 7s
## 15650K .......... .......... .......... .......... .......... 51% 2.14M 7s
## 15700K .......... .......... .......... .......... .......... 52% 2.18M 7s
## 15750K .......... .......... .......... .......... .......... 52% 2.21M 7s
## 15800K .......... .......... .......... .......... .......... 52% 2.12M 7s
## 15850K .......... .......... .......... .......... .......... 52% 1.54M 7s
## 15900K .......... .......... .......... .......... .......... 52% 2.63M 7s
## 15950K .......... .......... .......... .......... .......... 52% 1.02M 7s
## 16000K .......... .......... .......... .......... .......... 53% 1.93M 7s
## 16050K .......... .......... .......... .......... .......... 53% 5.05M 7s
## 16100K .......... .......... .......... .......... .......... 53% 2.62M 7s
## 16150K .......... .......... .......... .......... .......... 53% 2.41M 7s
## 16200K .......... .......... .......... .......... .......... 53% 1.93M 7s
## 16250K .......... .......... .......... .......... .......... 53% 2.42M 7s
## 16300K .......... .......... .......... .......... .......... 54% 2.14M 7s
## 16350K .......... .......... .......... .......... .......... 54% 2.14M 7s
## 16400K .......... .......... .......... .......... .......... 54% 1.53M 7s
## 16450K .......... .......... .......... .......... .......... 54% 2.53M 7s
## 16500K .......... .......... .......... .......... .......... 54% 2.13M 7s
## 16550K .......... .......... .......... .......... .......... 54% 2.20M 7s
## 16600K .......... .......... .......... .......... .......... 55% 2.14M 7s
## 16650K .......... .......... .......... .......... .......... 55% 2.17M 6s
## 16700K .......... .......... .......... .......... .......... 55% 2.19M 6s
## 16750K .......... .......... .......... .......... .......... 55% 2.15M 6s
## 16800K .......... .......... .......... .......... .......... 55% 1.58M 6s
## 16850K .......... .......... .......... .......... .......... 55% 2.22M 6s
## 16900K .......... .......... .......... .......... .......... 56% 2.30M 6s
## 16950K .......... .......... .......... .......... .......... 56% 2.16M 6s
## 17000K .......... .......... .......... .......... .......... 56% 2.19M 6s
## 17050K .......... .......... .......... .......... .......... 56% 2.10M 6s
## 17100K .......... .......... .......... .......... .......... 56% 2.27M 6s
## 17150K .......... .......... .......... .......... .......... 56% 2.02M 6s
## 17200K .......... .......... .......... .......... .......... 57% 1.64M 6s
## 17250K .......... .......... .......... .......... .......... 57% 2.36M 6s
## 17300K .......... .......... .......... .......... .......... 57% 2.12M 6s
## 17350K .......... .......... .......... .......... .......... 57% 2.23M 6s
## 17400K .......... .......... .......... .......... .......... 57% 2.13M 6s
## 17450K .......... .......... .......... .......... .......... 57% 2.17M 6s
## 17500K .......... .......... .......... .......... .......... 58% 1.82M 6s
## 17550K .......... .......... .......... .......... .......... 58% 2.80M 6s
## 17600K .......... .......... .......... .......... .......... 58% 1.60M 6s
## 17650K .......... .......... .......... .......... .......... 58% 2.19M 6s
## 17700K .......... .......... .......... .......... .......... 58% 2.16M 6s
## 17750K .......... .......... .......... .......... .......... 58% 2.20M 6s
## 17800K .......... .......... .......... .......... .......... 59% 2.20M 6s
## 17850K .......... .......... .......... .......... .......... 59% 1.78M 6s
## 17900K .......... .......... .......... .......... .......... 59% 2.68M 6s
## 17950K .......... .......... .......... .......... .......... 59% 1.86M 6s
## 18000K .......... .......... .......... .......... .......... 59% 1.88M 6s
## 18050K .......... .......... .......... .......... .......... 59% 2.25M 6s
## 18100K .......... .......... .......... .......... .......... 60% 2.12M 6s
## 18150K .......... .......... .......... .......... .......... 60% 2.24M 6s
## 18200K .......... .......... .......... .......... .......... 60% 2.05M 6s
## 18250K .......... .......... .......... .......... .......... 60% 2.13M 6s
## 18300K .......... .......... .......... .......... .......... 60% 2.01M 6s
## 18350K .......... .......... .......... .......... .......... 60% 2.36M 6s
## 18400K .......... .......... .......... .......... .......... 61% 1.71M 6s
## 18450K .......... .......... .......... .......... .......... 61% 2.17M 6s
## 18500K .......... .......... .......... .......... .......... 61% 1.86M 6s
## 18550K .......... .......... .......... .......... .......... 61% 2.54M 6s
## 18600K .......... .......... .......... .......... .......... 61% 1.96M 6s
## 18650K .......... .......... .......... .......... .......... 61% 2.50M 6s
## 18700K .......... .......... .......... .......... .......... 61% 2.18M 5s
## 18750K .......... .......... .......... .......... .......... 62% 2.06M 5s
## 18800K .......... .......... .......... .......... .......... 62% 1.43M 5s
## 18850K .......... .......... .......... .......... .......... 62% 3.05M 5s
## 18900K .......... .......... .......... .......... .......... 62% 2.04M 5s
## 18950K .......... .......... .......... .......... .......... 62% 2.04M 5s
## 19000K .......... .......... .......... .......... .......... 62% 2.31M 5s
## 19050K .......... .......... .......... .......... .......... 63% 2.22M 5s
## 19100K .......... .......... .......... .......... .......... 63% 2.22M 5s
## 19150K .......... .......... .......... .......... .......... 63% 2.24M 5s
## 19200K .......... .......... .......... .......... .......... 63% 1.62M 5s
## 19250K .......... .......... .......... .......... .......... 63% 1.82M 5s
## 19300K .......... .......... .......... .......... .......... 63% 2.65M 5s
## 19350K .......... .......... .......... .......... .......... 64% 2.16M 5s
## 19400K .......... .......... .......... .......... .......... 64% 2.13M 5s
## 19450K .......... .......... .......... .......... .......... 64% 2.23M 5s
## 19500K .......... .......... .......... .......... .......... 64% 2.20M 5s
## 19550K .......... .......... .......... .......... .......... 64% 2.15M 5s
## 19600K .......... .......... .......... .......... .......... 64% 1.51M 5s
## 19650K .......... .......... .......... .......... .......... 65% 2.48M 5s
## 19700K .......... .......... .......... .......... .......... 65% 2.04M 5s
## 19750K .......... .......... .......... .......... .......... 65% 2.36M 5s
## 19800K .......... .......... .......... .......... .......... 65% 2.11M 5s
## 19850K .......... .......... .......... .......... .......... 65% 2.21M 5s
## 19900K .......... .......... .......... .......... .......... 65% 2.13M 5s
## 19950K .......... .......... .......... .......... .......... 66% 2.25M 5s
## 20000K .......... .......... .......... .......... .......... 66% 1.63M 5s
## 20050K .......... .......... .......... .......... .......... 66% 2.05M 5s
## 20100K .......... .......... .......... .......... .......... 66% 2.17M 5s
## 20150K .......... .......... .......... .......... .......... 66% 2.24M 5s
## 20200K .......... .......... .......... .......... .......... 66% 2.18M 5s
## 20250K .......... .......... .......... .......... .......... 67% 2.10M 5s
## 20300K .......... .......... .......... .......... .......... 67% 2.05M 5s
## 20350K .......... .......... .......... .......... .......... 67% 1.90M 5s
## 20400K .......... .......... .......... .......... .......... 67% 1.92M 5s
## 20450K .......... .......... .......... .......... .......... 67% 2.23M 5s
## 20500K .......... .......... .......... .......... .......... 67% 2.11M 5s
## 20550K .......... .......... .......... .......... .......... 68% 2.20M 5s
## 20600K .......... .......... .......... .......... .......... 68% 2.14M 5s
## 20650K .......... .......... .......... .......... .......... 68% 2.12M 5s
## 20700K .......... .......... .......... .......... .......... 68% 1.94M 5s
## 20750K .......... .......... .......... .......... .......... 68% 2.21M 5s
## 20800K .......... .......... .......... .......... .......... 68% 1.75M 4s
## 20850K .......... .......... .......... .......... .......... 69% 2.25M 4s
## 20900K .......... .......... .......... .......... .......... 69% 2.25M 4s
## 20950K .......... .......... .......... .......... .......... 69% 1.99M 4s
## 21000K .......... .......... .......... .......... .......... 69% 1.91M 4s
## 21050K .......... .......... .......... .......... .......... 69% 2.85M 4s
## 21100K .......... .......... .......... .......... .......... 69% 2.10M 4s
## 21150K .......... .......... .......... .......... .......... 70% 2.02M 4s
## 21200K .......... .......... .......... .......... .......... 70% 1.77M 4s
## 21250K .......... .......... .......... .......... .......... 70% 2.13M 4s
## 21300K .......... .......... .......... .......... .......... 70% 2.12M 4s
## 21350K .......... .......... .......... .......... .......... 70% 1.93M 4s
## 21400K .......... .......... .......... .......... .......... 70% 2.65M 4s
## 21450K .......... .......... .......... .......... .......... 71% 2.02M 4s
## 21500K .......... .......... .......... .......... .......... 71% 2.25M 4s
## 21550K .......... .......... .......... .......... .......... 71% 2.25M 4s
## 21600K .......... .......... .......... .......... .......... 71% 1.69M 4s
## 21650K .......... .......... .......... .......... .......... 71% 1.82M 4s
## 21700K .......... .......... .......... .......... .......... 71% 2.66M 4s
## 21750K .......... .......... .......... .......... .......... 72% 2.11M 4s
## 21800K .......... .......... .......... .......... .......... 72% 1.98M 4s
## 21850K .......... .......... .......... .......... .......... 72% 2.38M 4s
## 21900K .......... .......... .......... .......... .......... 72% 1.98M 4s
## 21950K .......... .......... .......... .......... .......... 72% 2.46M 4s
## 22000K .......... .......... .......... .......... .......... 72% 1.43M 4s
## 22050K .......... .......... .......... .......... .......... 73% 2.68M 4s
## 22100K .......... .......... .......... .......... .......... 73% 1.95M 4s
## 22150K .......... .......... .......... .......... .......... 73% 2.20M 4s
## 22200K .......... .......... .......... .......... .......... 73% 1.65M 4s
## 22250K .......... .......... .......... .......... .......... 73% 3.06M 4s
## 22300K .......... .......... .......... .......... .......... 73% 2.66M 4s
## 22350K .......... .......... .......... .......... .......... 74% 2.15M 4s
## 22400K .......... .......... .......... .......... .......... 74% 1.63M 4s
## 22450K .......... .......... .......... .......... .......... 74% 2.03M 4s
## 22500K .......... .......... .......... .......... .......... 74% 2.43M 4s
## 22550K .......... .......... .......... .......... .......... 74% 2.00M 4s
## 22600K .......... .......... .......... .......... .......... 74% 2.28M 4s
## 22650K .......... .......... .......... .......... .......... 75% 2.21M 4s
## 22700K .......... .......... .......... .......... .......... 75% 2.18M 4s
## 22750K .......... .......... .......... .......... .......... 75% 1.95M 4s
## 22800K .......... .......... .......... .......... .......... 75% 1.62M 4s
## 22850K .......... .......... .......... .......... .......... 75% 2.58M 3s
## 22900K .......... .......... .......... .......... .......... 75% 2.05M 3s
## 22950K .......... .......... .......... .......... .......... 76% 2.10M 3s
## 23000K .......... .......... .......... .......... .......... 76% 2.42M 3s
## 23050K .......... .......... .......... .......... .......... 76% 2.15M 3s
## 23100K .......... .......... .......... .......... .......... 76% 2.19M 3s
## 23150K .......... .......... .......... .......... .......... 76% 2.21M 3s
## 23200K .......... .......... .......... .......... .......... 76% 1.54M 3s
## 23250K .......... .......... .......... .......... .......... 77% 2.34M 3s
## 23300K .......... .......... .......... .......... .......... 77% 2.12M 3s
## 23350K .......... .......... .......... .......... .......... 77% 2.27M 3s
## 23400K .......... .......... .......... .......... .......... 77% 2.16M 3s
## 23450K .......... .......... .......... .......... .......... 77% 2.14M 3s
## 23500K .......... .......... .......... .......... .......... 77% 2.02M 3s
## 23550K .......... .......... .......... .......... .......... 78% 2.02M 3s
## 23600K .......... .......... .......... .......... .......... 78% 1.87M 3s
## 23650K .......... .......... .......... .......... .......... 78% 2.13M 3s
## 23700K .......... .......... .......... .......... .......... 78% 2.23M 3s
## 23750K .......... .......... .......... .......... .......... 78% 2.11M 3s
## 23800K .......... .......... .......... .......... .......... 78% 2.25M 3s
## 23850K .......... .......... .......... .......... .......... 79% 1.75M 3s
## 23900K .......... .......... .......... .......... .......... 79% 2.89M 3s
## 23950K .......... .......... .......... .......... .......... 79% 2.07M 3s
## 24000K .......... .......... .......... .......... .......... 79% 1.71M 3s
## 24050K .......... .......... .......... .......... .......... 79% 2.13M 3s
## 24100K .......... .......... .......... .......... .......... 79% 2.14M 3s
## 24150K .......... .......... .......... .......... .......... 80% 2.20M 3s
## 24200K .......... .......... .......... .......... .......... 80% 1.81M 3s
## 24250K .......... .......... .......... .......... .......... 80% 2.72M 3s
## 24300K .......... .......... .......... .......... .......... 80% 1.89M 3s
## 24350K .......... .......... .......... .......... .......... 80% 2.55M 3s
## 24400K .......... .......... .......... .......... .......... 80% 1.63M 3s
## 24450K .......... .......... .......... .......... .......... 81% 2.15M 3s
## 24500K .......... .......... .......... .......... .......... 81% 1.78M 3s
## 24550K .......... .......... .......... .......... .......... 81% 2.68M 3s
## 24600K .......... .......... .......... .......... .......... 81% 2.04M 3s
## 24650K .......... .......... .......... .......... .......... 81% 2.06M 3s
## 24700K .......... .......... .......... .......... .......... 81% 2.31M 3s
## 24750K .......... .......... .......... .......... .......... 81% 2.22M 3s
## 24800K .......... .......... .......... .......... .......... 82% 1.57M 3s
## 24850K .......... .......... .......... .......... .......... 82% 2.04M 3s
## 24900K .......... .......... .......... .......... .......... 82% 2.58M 3s
## 24950K .......... .......... .......... .......... .......... 82% 2.02M 2s
## 25000K .......... .......... .......... .......... .......... 82% 2.32M 2s
## 25050K .......... .......... .......... .......... .......... 82% 2.15M 2s
## 25100K .......... .......... .......... .......... .......... 83% 2.19M 2s
## 25150K .......... .......... .......... .......... .......... 83% 2.25M 2s
## 25200K .......... .......... .......... .......... .......... 83% 1.64M 2s
## 25250K .......... .......... .......... .......... .......... 83% 2.13M 2s
## 25300K .......... .......... .......... .......... .......... 83% 2.07M 2s
## 25350K .......... .......... .......... .......... .......... 83% 2.32M 2s
## 25400K .......... .......... .......... .......... .......... 84% 2.15M 2s
## 25450K .......... .......... .......... .......... .......... 84% 2.21M 2s
## 25500K .......... .......... .......... .......... .......... 84% 2.18M 2s
## 25550K .......... .......... .......... .......... .......... 84% 2.15M 2s
## 25600K .......... .......... .......... .......... .......... 84% 1.42M 2s
## 25650K .......... .......... .......... .......... .......... 84% 2.77M 2s
## 25700K .......... .......... .......... .......... .......... 85% 2.17M 2s
## 25750K .......... .......... .......... .......... .......... 85% 2.01M 2s
## 25800K .......... .......... .......... .......... .......... 85% 2.34M 2s
## 25850K .......... .......... .......... .......... .......... 85% 2.03M 2s
## 25900K .......... .......... .......... .......... .......... 85% 2.30M 2s
## 25950K .......... .......... .......... .......... .......... 85% 2.17M 2s
## 26000K .......... .......... .......... .......... .......... 86% 1.68M 2s
## 26050K .......... .......... .......... .......... .......... 86% 1.83M 2s
## 26100K .......... .......... .......... .......... .......... 86% 2.61M 2s
## 26150K .......... .......... .......... .......... .......... 86% 2.14M 2s
## 26200K .......... .......... .......... .......... .......... 86% 2.15M 2s
## 26250K .......... .......... .......... .......... .......... 86% 2.19M 2s
## 26300K .......... .......... .......... .......... .......... 87% 2.21M 2s
## 26350K .......... .......... .......... .......... .......... 87% 2.08M 2s
## 26400K .......... .......... .......... .......... .......... 87% 1.63M 2s
## 26450K .......... .......... .......... .......... .......... 87% 2.33M 2s
## 26500K .......... .......... .......... .......... .......... 87% 2.10M 2s
## 26550K .......... .......... .......... .......... .......... 87% 2.23M 2s
## 26600K .......... .......... .......... .......... .......... 88% 2.09M 2s
## 26650K .......... .......... .......... .......... .......... 88% 2.11M 2s
## 26700K .......... .......... .......... .......... .......... 88% 1.79M 2s
## 26750K .......... .......... .......... .......... .......... 88% 3.14M 2s
## 26800K .......... .......... .......... .......... .......... 88% 1.59M 2s
## 26850K .......... .......... .......... .......... .......... 88% 2.20M 2s
## 26900K .......... .......... .......... .......... .......... 89% 2.13M 2s
## 26950K .......... .......... .......... .......... .......... 89% 2.20M 2s
## 27000K .......... .......... .......... .......... .......... 89% 2.10M 2s
## 27050K .......... .......... .......... .......... .......... 89% 1.89M 1s
## 27100K .......... .......... .......... .......... .......... 89% 2.73M 1s
## 27150K .......... .......... .......... .......... .......... 89% 1.95M 1s
## 27200K .......... .......... .......... .......... .......... 90% 1.79M 1s
## 27250K .......... .......... .......... .......... .......... 90% 2.23M 1s
## 27300K .......... .......... .......... .......... .......... 90% 2.13M 1s
## 27350K .......... .......... .......... .......... .......... 90% 1.76M 1s
## 27400K .......... .......... .......... .......... .......... 90% 2.74M 1s
## 27450K .......... .......... .......... .......... .......... 90% 2.17M 1s
## 27500K .......... .......... .......... .......... .......... 91% 2.03M 1s
## 27550K .......... .......... .......... .......... .......... 91% 2.33M 1s
## 27600K .......... .......... .......... .......... .......... 91% 1.61M 1s
## 27650K .......... .......... .......... .......... .......... 91% 2.15M 1s
## 27700K .......... .......... .......... .......... .......... 91% 1.94M 1s
## 27750K .......... .......... .......... .......... .......... 91% 2.20M 1s
## 27800K .......... .......... .......... .......... .......... 92% 2.29M 1s
## 27850K .......... .......... .......... .......... .......... 92% 2.45M 1s
## 27900K .......... .......... .......... .......... .......... 92% 2.22M 1s
## 27950K .......... .......... .......... .......... .......... 92% 2.08M 1s
## 28000K .......... .......... .......... .......... .......... 92% 1.42M 1s
## 28050K .......... .......... .......... .......... .......... 92% 3.00M 1s
## 28100K .......... .......... .......... .......... .......... 93% 2.04M 1s
## 28150K .......... .......... .......... .......... .......... 93% 2.06M 1s
## 28200K .......... .......... .......... .......... .......... 93% 2.29M 1s
## 28250K .......... .......... .......... .......... .......... 93% 2.19M 1s
## 28300K .......... .......... .......... .......... .......... 93% 2.30M 1s
## 28350K .......... .......... .......... .......... .......... 93% 2.17M 1s
## 28400K .......... .......... .......... .......... .......... 94% 1.62M 1s
## 28450K .......... .......... .......... .......... .......... 94% 1.83M 1s
## 28500K .......... .......... .......... .......... .......... 94% 2.63M 1s
## 28550K .......... .......... .......... .......... .......... 94% 2.23M 1s
## 28600K .......... .......... .......... .......... .......... 94% 1.76M 1s
## 28650K .......... .......... .......... .......... .......... 94% 2.77M 1s
## 28700K .......... .......... .......... .......... .......... 95% 2.22M 1s
## 28750K .......... .......... .......... .......... .......... 95% 2.08M 1s
## 28800K .......... .......... .......... .......... .......... 95% 1.55M 1s
## 28850K .......... .......... .......... .......... .......... 95% 2.24M 1s
## 28900K .......... .......... .......... .......... .......... 95% 2.21M 1s
## 28950K .......... .......... .......... .......... .......... 95% 2.26M 1s
## 29000K .......... .......... .......... .......... .......... 96% 2.08M 1s
## 29050K .......... .......... .......... .......... .......... 96% 2.28M 1s
## 29100K .......... .......... .......... .......... .......... 96% 2.11M 1s
## 29150K .......... .......... .......... .......... .......... 96% 2.21M 0s
## 29200K .......... .......... .......... .......... .......... 96% 1.63M 0s
## 29250K .......... .......... .......... .......... .......... 96% 2.10M 0s
## 29300K .......... .......... .......... .......... .......... 97% 2.23M 0s
## 29350K .......... .......... .......... .......... .......... 97% 2.19M 0s
## 29400K .......... .......... .......... .......... .......... 97% 2.21M 0s
## 29450K .......... .......... .......... .......... .......... 97% 2.19M 0s
## 29500K .......... .......... .......... .......... .......... 97% 2.14M 0s
## 29550K .......... .......... .......... .......... .......... 97% 1.73M 0s
## 29600K .......... .......... .......... .......... .......... 98% 2.08M 0s
## 29650K .......... .......... .......... .......... .......... 98% 2.20M 0s
## 29700K .......... .......... .......... .......... .......... 98% 2.04M 0s
## 29750K .......... .......... .......... .......... .......... 98% 2.33M 0s
## 29800K .......... .......... .......... .......... .......... 98% 1.89M 0s
## 29850K .......... .......... .......... .......... .......... 98% 2.32M 0s
## 29900K .......... .......... .......... .......... .......... 99% 2.23M 0s
## 29950K .......... .......... .......... .......... .......... 99% 2.26M 0s
## 30000K .......... .......... .......... .......... .......... 99% 1.64M 0s
## 30050K .......... .......... .......... .......... .......... 99% 2.11M 0s
## 30100K .......... .......... .......... .......... .......... 99% 2.27M 0s
## 30150K .......... .......... .......... .......... .......... 99% 2.06M 0s
## 30200K .......... .......... .......... .......... ... 100% 2.83M=14s
##
## 2016-02-12 05:09:04 (2.06 MB/s) - 'gg_13_5_otus.tar.gz' saved [30969823/30969823]
##
## x ./gg_13_5_otus/
## x ./gg_13_5_otus/97_otu_taxonomy.txt
## x ./gg_13_5_otus/97_otus.fasta
Add the taxa outputted to the OTU table with the column header “taxonomy” and output the resulting file to biom format:
awk 'NR==1; NR > 1 {print $0 | "sort"}' dairybreeds.otu_table.txt > dairybreeds.otu_table.sort.txt
sort assigned_gg_taxa/dairybreeds.otus2_tax_assignments.txt > assigned_gg_taxa/dairybreeds.otus2_tax_assignments.sort.txt
{ printf '\ttaxonomy\t\t\n'; cat assigned_gg_taxa/dairybreeds.otus2_tax_assignments.sort.txt ; } > assigned_gg_taxa/dairybreeds.otus2_tax_assignments.sort.label.txt
awk '!/2496/{print}' assigned_gg_taxa/dairybreeds.otus2_tax_assignments.sort.label.txt > assigned_gg_taxa/dairybreeds.otus2_tax_assignments.sort.label2.txt
paste dairybreeds.otu_table.sort.txt <(cut -f 2 assigned_gg_taxa/dairybreeds.otus2_tax_assignments.sort.label2.txt) > dairybreeds.otu_table.tax.txt
rm dairybreeds.otu_table.sort.txt
biom convert --table-type "OTU table" -i dairybreeds.otu_table.tax.txt -o dairybreeds.otu_table.tax.biom --process-obs-metadata taxonomy --to-json
Remove sample x106(Jersey cow). Sample was not obtained from this cow.
printf "x106" > remove_sample.txt
filter_samples_from_otu_table.py -i dairybreeds.otu_table.tax.biom -o dairybreeds.otu_table.tax.filtered.biom --sample_id_fp remove_sample.txt --negate_sample_id_fp
Sequences were aligned using the RDP aligner. The aligned_dairybreeds.otus2.fasta_alignment_summary.txt obtained from the alignment is provided. This file was used to assess OTUs that did not aligned properly based on the following criteria: 1. The minimum starting position was 324. As previously described, sequences were trimmed to be 130 basepairs in length and since the RDP aligner sets the end position to be 454, sequences must start at 324 to be 130 basepairs (i.e. 454-324=130). 2. The maximun starting position was 354. The minimum number of basepairs accepted was 100, thus these sequences should start at 354 to contain 100 basepairs (i.e. 454-354=100).
Output files from the RDP aligner are provided.
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/aligned_dairybreeds.otus2.fasta
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/aligned_dairybreeds.otus2.fasta_alignment_summary.txt
Make a file containing all the OTUs that aligned properly
alignedOTUs <- read.table("aligned_dairybreeds.otus2.fasta_alignment_summary.txt",
header = T, sep = "\t")
properOTUtable <- subset(alignedOTUs, (Start >= 324 & Start <= 354) & End ==
454, select = SequenceId)
write.table(properOTUtable, file = "proper_aligned_otu.txt", col.names = F,
row.names = F)
Remove those OTUs that did not align well from the OTU table. Then remove OTUs with Cyanobacteria classification. UPARSE should have removed sinlgeton OTUs, but double check with the (-n 2 parameter):
filter_otus_from_otu_table.py -i dairybreeds.otu_table.tax.filtered.biom -o dairybreeds.otu_table.tax.filtered.filtered.biom --negate_ids_to_exclude -e proper_aligned_otu.txt -n 2
filter_taxa_from_otu_table.py -i dairybreeds.otu_table.tax.filtered.filtered.biom -o dairybreeds.otu_table.tax.final.biom -n p__Cyanobacteria
Generate quality filtered OTU table with assigend taxonomy
biom convert -i dairybreeds.otu_table.tax.final.biom -o Table_OTUs.txt --to-tsv --header-key taxonomy
otus_data <- read.table("Table_OTUs.txt", header = F, sep = "\t")
colnames(otus_data) <- c("OTU ID", "Holstein5", "Holstein3", "Jersey3", "Holstein1",
"Jersey4", "Holstein3C", "Holstein2", "Holstein4C", "Holstein4", "Holstein5C",
"Jersey1", "Holstein2C", "Jersey2", "Holstein1C", "Taxonomy")
otus_data_arranged <- otus_data[c("OTU ID", "Holstein1", "Holstein2", "Holstein3",
"Holstein4", "Holstein5", "Holstein1C", "Holstein2C", "Holstein3C", "Holstein4C",
"Holstein5C", "Jersey1", "Jersey2", "Jersey3", "Jersey4", "Taxonomy")]
write.table(otus_data_arranged, sep = "\t", file = "TableS3.txt", col.names = T,
row.names = F)
Use the aligned file to generate a phylogenetic tree using clearcut in mothur. Note that using the unfiltered aligned file does not affect downstream results. Clearcut requires ID lengths greater than ~10 characters, thus add 10 ’A’s to the front of all sequence names. Then remove the ’A’s after the tree is formed.
sed -i -e 's/>/>AAAAAAAAAA/g' aligned_dairybreeds.otus2.fasta
mothur "#dist.seqs(fasta=aligned_dairybreeds.otus2.fasta, output=lt)"
mothur "#clearcut(phylip=aligned_dairybreeds.otus2.phylip.dist)"
sed -i -e 's/AAAAAAAAAA//g' aligned_dairybreeds.otus2.phylip.tre
## [H[2J
##
##
##
##
##
## mothur v.1.36.1
## Last updated: 7/27/2015
##
## by
## Patrick D. Schloss
##
## Department of Microbiology & Immunology
## University of Michigan
## pschloss@umich.edu
## http://www.mothur.org
##
## When using, please cite:
## Schloss, P.D., et al., Introducing mothur: Open-source, platform-independent, community-supported software for describing and comparing microbial communities. Appl Environ Microbiol, 2009. 75(23):7537-41.
##
## Distributed under the GNU General Public License
##
## Type 'help()' for information on the commands that are available
##
## Type 'quit()' to exit program
##
##
##
## mothur > dist.seqs(fasta=aligned_dairybreeds.otus2.fasta, output=lt)
## [WARNING]: We found more than 25% of the bases in sequence AAAAAAAAAA#=GC_RF to be ambiguous. Mothur is not setup to process protein sequences.
##
## Using 1 processors.
## 0 0
## 100 0
## 200 0
## 300 0
## 400 0
## 500 0
## 600 0
## 700 0
## 800 0
## 900 0
## 1000 1
## 1100 1
## 1200 1
## 1300 1
## 1400 2
## 1500 2
## 1600 2
## 1700 3
## 1800 3
## 1900 3
## 2000 4
## 2100 4
## 2200 4
## 2300 5
## 2400 5
## 2500 6
## 2600 6
## 2700 7
## 2800 7
## 2900 8
## 3000 9
## 3100 9
## 3200 10
## 3249 10
##
## Output File Names:
## aligned_dairybreeds.otus2.phylip.dist
##
## It took 10 seconds to calculate the distances for 3250 sequences.
##
## mothur > quit()
## [H[2J
##
##
##
##
##
## mothur v.1.36.1
## Last updated: 7/27/2015
##
## by
## Patrick D. Schloss
##
## Department of Microbiology & Immunology
## University of Michigan
## pschloss@umich.edu
## http://www.mothur.org
##
## When using, please cite:
## Schloss, P.D., et al., Introducing mothur: Open-source, platform-independent, community-supported software for describing and comparing microbial communities. Appl Environ Microbiol, 2009. 75(23):7537-41.
##
## Distributed under the GNU General Public License
##
## Type 'help()' for information on the commands that are available
##
## Type 'quit()' to exit program
##
##
##
## mothur > clearcut(phylip=aligned_dairybreeds.otus2.phylip.dist)
##
## Output File Names:
## aligned_dairybreeds.otus2.phylip.tre
##
##
## mothur > quit()
Rarefy quality filtered OTU table to the lowest depth across samples and compute alpha diversity metrics and generate rarefaction plots. Note from QIIME: “If the lines for some categories do not extend all the way to the right end of the x-axis, that means that at least one of the samples in that category does not have that many sequences.”
biom summarize-table -i dairybreeds.otu_table.tax.final.biom -o dairybreeds.otu_table.tax.final.summary.txt
single_rarefaction.py -i dairybreeds.otu_table.tax.final.biom -d 12141 -o dairybreeds.otu_table_rarefied.biom
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/dairybreeds.otu_table_rarefied.biom
## --2016-02-12 05:10:26-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/dairybreeds.otu_table_rarefied.biom
## Resolving raw.githubusercontent.com... 23.235.40.133
## Connecting to raw.githubusercontent.com|23.235.40.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 906210 (885K) [application/octet-stream]
## Saving to: 'dairybreeds.otu_table_rarefied.biom'
##
## 0K .......... .......... .......... .......... .......... 5% 675K 1s
## 50K .......... .......... .......... .......... .......... 11% 800K 1s
## 100K .......... .......... .......... .......... .......... 16% 768K 1s
## 150K .......... .......... .......... .......... .......... 22% 395K 1s
## 200K .......... .......... .......... .......... .......... 28% 340K 1s
## 250K .......... .......... .......... .......... .......... 33% 700K 1s
## 300K .......... .......... .......... .......... .......... 39% 527K 1s
## 350K .......... .......... .......... .......... .......... 45% 600K 1s
## 400K .......... .......... .......... .......... .......... 50% 528K 1s
## 450K .......... .......... .......... .......... .......... 56% 820K 1s
## 500K .......... .......... .......... .......... .......... 62% 794K 1s
## 550K .......... .......... .......... .......... .......... 67% 783K 0s
## 600K .......... .......... .......... .......... .......... 73% 764K 0s
## 650K .......... .......... .......... .......... .......... 79% 814K 0s
## 700K .......... .......... .......... .......... .......... 84% 819K 0s
## 750K .......... .......... .......... .......... .......... 90% 499K 0s
## 800K .......... .......... .......... .......... .......... 96% 694K 0s
## 850K .......... .......... .......... .... 100% 913K=1.4s
##
## 2016-02-12 05:10:27 (630 KB/s) - 'dairybreeds.otu_table_rarefied.biom' saved [906210/906210]
alpha_diversity.py -i dairybreeds.otu_table_rarefied.biom -m chao1,observed_otus,goods_coverage -o alpha_rarefaction_even.txt
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/qiime_parameters.txt
alpha_rarefaction.py -i dairybreeds.otu_table_rarefied.biom -n 10 --min_rare_depth 1 -m mappingfile.txt -p qiime_parameters.txt -o alpha_rarefaction_plots_even
## --2016-02-12 05:10:29-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/qiime_parameters.txt
## Resolving raw.githubusercontent.com... 23.235.40.133
## Connecting to raw.githubusercontent.com|23.235.40.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 203 [text/plain]
## Saving to: 'qiime_parameters.txt'
##
## 0K 100% 13.8M=0s
##
## 2016-02-12 05:10:29 (13.8 MB/s) - 'qiime_parameters.txt' saved [203/203]
##
## /Users/henry/anaconda/envs/projectEnv/lib/python2.7/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
## if self._edgecolors == str('face'):
Generate alpha diversity charts
library(XML)
library(matrixStats)
## matrixStats v0.50.1 (2015-12-14) successfully loaded. See ?matrixStats for help.
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:plyr':
##
## count
rare_table <- readHTMLTable("alpha_rarefaction_plots_even/alpha_rarefaction_plots/rarefaction_plots.html")
rare_table$rare_data[rare_table$rare_data == "nan"] <- NA
alpha_rare <- na.omit(rare_table$rare_data)
colnames(alpha_rare)[2] <- "Seqs.Sample"
colnames(alpha_rare)[3] <- "chao1.Ave."
colnames(alpha_rare)[4] <- "chao1.Err."
colnames(alpha_rare)[5] <- "observed_otus.Ave."
colnames(alpha_rare)[6] <- "observed_otus.Err."
cols = c(2, 3, 4, 5, 6)
alpha_rare[, cols] <- lapply(cols, function(x) as.numeric(as.character(alpha_rare[,
x])))
breed_rare <- subset(alpha_rare, Description %in% c("Holstein", "HolsteinC",
"Jersey"))
method_rare <- subset(alpha_rare, Description %in% c("Cannula", "TubingH"))
breed_rare$label <- "Breed"
method_rare$label <- "Sampling Method"
alpha_rare <- rbind(breed_rare, method_rare)
pd <- position_dodge(width = 275)
rare_otu_plot <- ggplot(alpha_rare, aes(x = Seqs.Sample, y = observed_otus.Ave.,
colour = Description, group = Description, ymin = observed_otus.Ave. - observed_otus.Err.,
ymax = observed_otus.Ave. + observed_otus.Err.)) + geom_line(position = pd) +
geom_pointrange(position = pd) + scale_colour_manual(values = c("#FF0000",
"#0000FF", "#FFA500", "#006400", "#FF00FF"), limits = c("Holstein", "HolsteinC",
"Jersey", "Cannula", "TubingH"), labels = c("Holstein", "HolsteinC", "Jersey",
"Cannula", "Tubing")) + labs(x = "Sequences per Sample", y = "Mean Observed OTUs") +
theme(legend.title = element_blank(), axis.text = element_text(color = "black",
size = 8, face = "bold"), axis.title = element_text(color = "black",
size = 12, face = "bold"), legend.text = element_text(face = "bold"),
strip.text = element_text(size = 12, face = "bold")) + facet_grid(~label)
rare_chao1_plot <- ggplot(alpha_rare, aes(x = Seqs.Sample, y = chao1.Ave., colour = Description,
group = Description, ymin = chao1.Ave. - chao1.Err., ymax = chao1.Ave. +
chao1.Err.)) + geom_line(position = pd) + geom_pointrange(position = pd) +
scale_colour_manual(values = c("#FF0000", "#0000FF", "#FFA500", "#006400",
"#FF00FF"), limits = c("Holstein", "HolsteinC", "Jersey", "Cannula",
"TubingH"), labels = c("Holstein", "HolsteinC", "Jersey", "Cannula",
"Tubing")) + labs(x = "Sequences per Sample", y = "Mean Chao1") + theme(legend.title = element_blank(),
axis.text = element_text(color = "black", size = 8, face = "bold"), axis.title = element_text(color = "black",
size = 12, face = "bold"), legend.text = element_text(face = "bold"),
strip.text = element_text(size = 12, face = "bold")) + facet_grid(~label)
multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
library(grid)
plots <- c(list(...), plotlist)
numPlots = length(plots)
if (is.null(layout)) {
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), ncol = cols,
nrow = ceiling(numPlots/cols))
}
if (numPlots == 1) {
print(plots[[1]])
} else {
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
for (i in 1:numPlots) {
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, layout.pos.col = matchidx$col))
}
}
}
png("FigS2.png", units = "in", height = 6, width = 12, res = 300)
multiplot(rare_chao1_plot, rare_otu_plot, cols = 2)
dev.off()
## quartz_off_screen
## 2
pdf("FigS2.pdf", height = 6, width = 12)
multiplot(rare_chao1_plot, rare_otu_plot, cols = 2)
dev.off()
## quartz_off_screen
## 2
Perform a two-sided t-test to evaluate alpha diversity metrics
alpha <- read.table("alpha_rarefaction_even.txt", header = T, sep = "\t")
alpha_sub <- alpha[, c(2, 3)]
breed_data <- c("Holstein", "Holstein", "Jersey", "Holstein", "Jersey", "Holstein",
"HolsteinC", "HolsteinC", "Holstein", "HolsteinC", "Jersey", "HolsteinC",
"Jersey", "HolsteinC")
alpha_metrics <- data.frame(alpha_sub, breed_data)
breed_final <- subset(alpha_metrics, breed_data != "HolsteinC")
method_final <- subset(alpha_metrics, breed_data != "Jersey")
# Levene test Ho: Variances of breeds are equal
leveneTest(breed_final$chao1 ~ breed_final$breed_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.002 0.9652
## 7
leveneTest(breed_final$observed_otus ~ breed_final$breed_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.0058 0.9416
## 7
# T test Ho: alpha metrics Holstein = alpha metrics Jersey Two-sided t test
# default options used: mu = 0, alt = 'two.sided', conf = 0.95, paired = F
t.test(breed_final$chao1 ~ breed_final$breed_data, var.eq = T)
##
## Two Sample t-test
##
## data: breed_final$chao1 by breed_final$breed_data
## t = 3.1496, df = 7, p-value = 0.01616
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 73.34562 515.21409
## sample estimates:
## mean in group Holstein mean in group Jersey
## 1846.347 1552.067
t.test(breed_final$observed_otus ~ breed_final$breed_data, var.eq = T)
##
## Two Sample t-test
##
## data: breed_final$observed_otus by breed_final$breed_data
## t = 2.9867, df = 7, p-value = 0.02032
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 35.64569 306.65431
## sample estimates:
## mean in group Holstein mean in group Jersey
## 1343.40 1172.25
# Levene test Ho: Variances of methods are equal
leveneTest(method_final$chao1 ~ method_final$breed_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.1348 0.723
## 8
leveneTest(method_final$observed_otus ~ method_final$breed_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.2746 0.6145
## 8
# T test Ho: alpha metrics Tubing = alpha metrics Cannula Two-sided t test
# default options used: mu = 0, alt = 'two.sided', conf = 0.95, paired = F
t.test(method_final$chao1 ~ method_final$breed_data, var.eq = T)
##
## Two Sample t-test
##
## data: method_final$chao1 by method_final$breed_data
## t = 0.35397, df = 8, p-value = 0.7325
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -155.6428 212.0891
## sample estimates:
## mean in group Holstein mean in group HolsteinC
## 1846.347 1818.123
t.test(method_final$observed_otus ~ method_final$breed_data, var.eq = T)
##
## Two Sample t-test
##
## data: method_final$observed_otus by method_final$breed_data
## t = 0.38666, df = 8, p-value = 0.7091
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -92.32815 129.52815
## sample estimates:
## mean in group Holstein mean in group HolsteinC
## 1343.4 1324.8
Assign taxonomy and generate plots for desired taxa.
summarize_taxa.py -i dairybreeds.otu_table_rarefied.biom -o summarize_taxa -L 2,3,4,5,6,7
Generate a stacked bar chart for the phylum taxonomic rank across samples.
taxa_data <- read.table("summarize_taxa/dairybreeds.otu_table_rarefied_L2.txt",
header = F, sep = "\t")
colnames(taxa_data) <- c("Phylum", "Holstein5", "Holstein3", "Jersey3", "Holstein1",
"Jersey4", "Holstein2", "Holstein3C", "Holstein4C", "Holstein4", "Holstein5C",
"Jersey1", "Holstein2C", "Jersey2", "Holstein1C")
taxa_data2 <- taxa_data[c("Phylum", "Holstein1", "Holstein1C", "Holstein2",
"Holstein2C", "Holstein3", "Holstein3C", "Holstein4", "Holstein4C", "Holstein5",
"Holstein5C", "Jersey1", "Jersey2", "Jersey3", "Jersey4")]
taxa_data2$Phylum <- sub("k__", "", taxa_data2$Phylum)
taxa_data2$Phylum <- sub("Bacteria;", "", taxa_data2$Phylum)
taxa_data2$Phylum <- sub("p__", "", taxa_data2$Phylum)
taxa_data2$Phylum <- sub("Unassigned;Other", "Unassigned", taxa_data2$Phylum)
taxa_data2$Phylum <- sub("Other", "No Assigned Phylum", taxa_data2$Phylum)
plot_taxa_long <- gather(taxa_data2, Samples, Proportion, Holstein1:Jersey4)
png("FigS3.png", units = "in", height = 12, width = 17, res = 300)
ggplot(plot_taxa_long, aes(x = Samples, y = Proportion, fill = Phylum)) + geom_bar(stat = "identity") +
scale_fill_manual(values = c("#FF0000", "#0000FF", "#FFA500", "#556B2F",
"#800080", "#FFFF00", "#00FFFF", "#FFC0CB", "#5F9EA0", "#A52A2A", "#808080",
"#00FF00", "#000080", "#ADD8E6", "#E9967A", "#00FA9A", "#DA70D6", "#FFD700")) +
theme(axis.text = element_text(color = "black", size = 11, face = "bold"),
axis.title = element_text(color = "black", size = 14, face = "bold"),
legend.text = element_text(size = 12, face = "bold"), legend.title = element_text(size = 12))
dev.off()
## quartz_off_screen
## 2
pdf("FigS3.pdf", height = 12, width = 17)
ggplot(plot_taxa_long, aes(x = Samples, y = Proportion, fill = Phylum)) + geom_bar(stat = "identity") +
scale_fill_manual(values = c("#FF0000", "#0000FF", "#FFA500", "#556B2F",
"#800080", "#FFFF00", "#00FFFF", "#FFC0CB", "#5F9EA0", "#A52A2A", "#808080",
"#00FF00", "#000080", "#ADD8E6", "#E9967A", "#00FA9A", "#DA70D6", "#FFD700")) +
theme(axis.text = element_text(color = "black", size = 11, face = "bold"),
axis.title = element_text(color = "black", size = 14, face = "bold"),
legend.text = element_text(size = 12, face = "bold"), legend.title = element_text(size = 12))
dev.off()
## quartz_off_screen
## 2
Generate core file of OTUS. To include an OTU as a member of the core, a cut off of 80% (4/5 cows) was assigned for Holstein cows and 75% (3/4 cows) for Jersey cows.
mkdir cores
filter_otus_from_otu_table.py -i split_by_breed/dairybreeds.otu_table_rarefied__Breed_Holstein__.biom -o cores/core_Holstein.biom -s 4
filter_otus_from_otu_table.py -i split_by_breed/dairybreeds.otu_table_rarefied__Breed_HolsteinC__.biom -o cores/core_HolsteinC.biom -s 4
filter_otus_from_otu_table.py -i split_by_breed/dairybreeds.otu_table_rarefied__Breed_Jersey__.biom -o cores/core_Jersey.biom -s 3
merge_otu_tables.py -i cores/core_Holstein.biom,cores/core_HolsteinC.biom,cores/core_Jersey.biom -o cores/merged_cores.biom
biom convert -i cores/merged_cores.biom -o cores/merged_cores.txt --to-tsv
When cores files are merged, values are set to 0 for an OTU that is not a core in any other of the merged files. However, this does not mean that the OTU was not present but that it did not meet the cutoff criteria in a particular core file. Reads across OTUs have to be adjusted in the merged core file. From the adjusted core file, filter the OTU table into by breed and sampling method to generate respective beta diversity outputs. Additionally, from a file with all the data generate beta diversity outputs.
cores <- read.table("cores/merged_cores.txt", sep = "\t", header = F)
cores_sub <- cores[, 1]
write.table(cores_sub, file = "cores/core_otus.txt", col.names = F, row.names = F)
filter_otus_from_otu_table.py -i dairybreeds.otu_table_rarefied.biom -o final_core.biom --negate_ids_to_exclude -e cores/core_otus.txt
filter_samples_from_otu_table.py -i final_core.biom -o filter_breed.biom -m mappingfile.txt -s 'Breed:*,!HolsteinC'
filter_samples_from_otu_table.py -i final_core.biom -o filter_method.biom -m mappingfile.txt -s 'Breed:*,!Jersey'
beta_diversity_through_plots.py -i final_core.biom -o beta_div_all -t aligned_dairybreeds.otus2.phylip.tre -m mappingfile.txt -p qiime_parameters.txt
beta_diversity_through_plots.py -i filter_breed.biom -o beta_div_breed -t aligned_dairybreeds.otus2.phylip.tre -m mappingfile.txt -p qiime_parameters.txt
beta_diversity_through_plots.py -i filter_method.biom -o beta_div_method -m mappingfile.txt -t aligned_dairybreeds.otus2.phylip.tre -p qiime_parameters.txt
## /Users/henry/anaconda/envs/projectEnv/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2645: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`.
## VisibleDeprecationWarning)
## /Users/henry/anaconda/envs/projectEnv/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2645: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`.
## VisibleDeprecationWarning)
## /Users/henry/anaconda/envs/projectEnv/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2645: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`.
## VisibleDeprecationWarning)
Generate the principal coordinate analyses plots
# breed and sampling method
all_data <- read.table("beta_div_all/weighted_unifrac_pc.txt", skip = 9, nrows = 14,
sep = "\t")
all_pc <- all_data[, c("V2", "V3")]
samples_all <- c("H5", "H3", "J3", "H1", "J4", "H2", "H3C", "H4C", "H4", "H5C",
"J1", "H2C", "J2", "H1C")
breed_all <- c("Holstein", "Holstein", "Jersey", "Holstein", "Jersey", "Holstein",
"HolsteinC", "HolsteinC", "Holstein", "HolsteinC", "Jersey", "HolsteinC",
"Jersey", "HolsteinC")
pc_all <- data.frame(samples_all, all_pc, breed_all)
colnames(pc_all) <- c("SampleID", "PC1", "PC2", "Breed")
all_pc_plot <- ggplot(pc_all, aes(x = PC1, y = PC2, shape = Breed)) + geom_point(size = 3) +
labs(x = "PC1 (31.8%)", y = "PC2 (16.5%)") + ggtitle("Breed and Sampling Method") +
theme(title = element_text(color = "black", size = 14, face = "bold"), axis.text = element_text(color = "black",
size = 14, face = "bold"), axis.title = element_text(color = "black",
size = 14, face = "bold"), legend.text = element_text(size = 14, face = "bold"),
legend.title = element_blank()) + geom_text(aes(label = SampleID), size = 4,
vjust = -0.5, hjust = -0.3)
# breed
breed_data <- read.table("beta_div_breed/weighted_unifrac_pc.txt", skip = 9,
nrows = 9, sep = "\t")
breed_pc <- breed_data[, c("V2", "V3")]
samples <- c("H5", "H3", "J3", "H1", "J4", "H2", "H4", "J1", "J2")
breed <- c("Holstein", "Holstein", "Jersey", "Holstein", "Jersey", "Holstein",
"Holstein", "Jersey", "Jersey")
pc_breed <- data.frame(samples, breed_pc, breed)
colnames(pc_breed) <- c("SampleID", "PC1", "PC2", "Breed")
breed_pc_plot <- ggplot(pc_breed, aes(x = PC1, y = PC2, shape = Breed)) + geom_point(size = 3) +
labs(x = "PC1 (36.9%)", y = "PC2 (20.3%)") + ggtitle("Breed") + theme(title = element_text(color = "black",
size = 14, face = "bold"), axis.text = element_text(color = "black", size = 14,
face = "bold"), axis.title = element_text(color = "black", size = 14, face = "bold"),
legend.text = element_text(size = 14, face = "bold"), legend.title = element_blank()) +
geom_text(aes(label = SampleID), size = 4, vjust = -0.5, hjust = -0.3)
# sampling method
method_data <- read.table("beta_div_method/weighted_unifrac_pc.txt", skip = 9,
nrows = 10, sep = "\t")
method_pc <- method_data[, c("V2", "V3")]
samples_method <- c("H5", "H3", "H1", "H2", "H3C", "H4C", "H4", "H5C", "H2C",
"H1C")
method <- c("Tubing", "Tubing", "Tubing", "Tubing", "Cannula", "Cannula", "Tubing",
"Cannula", "Cannula", "Cannula")
pc_method <- data.frame(samples_method, method_pc, method)
colnames(pc_method) <- c("SampleID", "PC1", "PC2", "Method")
method_pc_plot <- ggplot(pc_method, aes(x = PC1, y = PC2, shape = Method)) +
geom_point(size = 3) + labs(x = "PC1 (32.4%)", y = "PC2 (24.3%)") + ggtitle("Sampling Method") +
theme(title = element_text(color = "black", size = 14, face = "bold"), axis.text = element_text(color = "black",
size = 14, face = "bold"), axis.title = element_text(color = "black",
size = 14, face = "bold"), legend.text = element_text(size = 14, face = "bold"),
legend.title = element_blank()) + geom_text(aes(label = SampleID), size = 4,
vjust = -0.5, hjust = -0.3)
png("Fig2.png", height = 8, width = 26, units = "in", res = 300)
multiplot(all_pc_plot, breed_pc_plot, method_pc_plot, cols = 3)
dev.off()
## quartz_off_screen
## 2
pdf("Fig2.pdf", height = 8, width = 26)
multiplot(all_pc_plot, breed_pc_plot, method_pc_plot, cols = 3)
dev.off()
## quartz_off_screen
## 2
Evaluate the effect of breed and method on microbial community composition using the permutational multivariate analysis of variance (permanova) test (implemented in R as adonis function in the vegan package) using the unweighted unifrac distance matrix
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.3-3
# Mapping files
mappingfile <- read.table("mappingfile.txt", sep = "\t", header = F)
colnames(mappingfile) <- c("SampleID", "BarcodeSequence", "LinkerPrimerSequence",
"ReversePrimer", "Method", "Breed", "BCcount", "Well", "Description")
mappingcorrect <- mappingfile[-3, ]
breed_map <- subset(mappingcorrect, Breed != "HolsteinC")
method_map <- subset(mappingfile, Breed != "Jersey")
breed_weighted <- read.table("beta_div_breed/weighted_unifrac_dm.txt", sep = "\t",
header = TRUE)
# Order breed mapping file Sample IDs like matirx Sample IDs
breed_map = breed_map[match(breed_weighted$X, breed_map$SampleID), ]
row.names(breed_weighted) <- breed_weighted$X
breed_weighted <- breed_weighted[, -1]
breed_weighted <- as.dist(breed_weighted)
method_weighted <- read.table("beta_div_method/weighted_unifrac_dm.txt", sep = "\t",
header = TRUE)
# Order method mapping file Sample IDs like matirx Sample IDs
method_map = method_map[match(method_weighted$X, method_map$SampleID), ]
row.names(method_weighted) <- method_weighted$X
method_weighted <- method_weighted[, -1]
method_weighted <- as.dist(method_weighted)
all_weighted <- read.table("beta_div_all/weighted_unifrac_dm.txt", sep = "\t",
header = TRUE)
# Order method mapping file Sample IDs like matirx Sample IDs
mappingcorrect = mappingcorrect[match(all_weighted$X, mappingcorrect$SampleID),
]
row.names(all_weighted) <- all_weighted$X
all_weighted <- all_weighted[, -1]
all_weighted <- as.dist(all_weighted)
adonis(breed_weighted ~ Breed, permutations = 999, data = breed_map)
##
## Call:
## adonis(formula = breed_weighted ~ Breed, data = breed_map, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Breed 1 0.22239 0.222393 2.4705 0.26086 0.009 **
## Residuals 7 0.63015 0.090022 0.73914
## Total 8 0.85254 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(method_weighted ~ Method, permutations = 999, data = method_map)
##
## Call:
## adonis(formula = method_weighted ~ Method, data = method_map, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Method 1 0.05418 0.054183 0.80592 0.09152 0.631
## Residuals 8 0.53785 0.067231 0.90848
## Total 9 0.59203 1.00000
adonis(all_weighted ~ Breed, permutations = 999, data = mappingcorrect)
##
## Call:
## adonis(formula = all_weighted ~ Breed, data = mappingcorrect, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Breed 2 0.30681 0.153405 1.9643 0.26316 0.008 **
## Residuals 11 0.85905 0.078095 0.73684
## Total 13 1.16586 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
HOMOVA
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/breed.dist.txt
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/breed.design.txt
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/method.dist.txt
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/method.design.txt
## --2016-02-12 05:12:32-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/breed.dist.txt
## Resolving raw.githubusercontent.com... 23.235.40.133
## Connecting to raw.githubusercontent.com|23.235.40.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 923 [text/plain]
## Saving to: 'breed.dist.txt'
##
## 0K 100% 29.3M=0s
##
## 2016-02-12 05:12:32 (29.3 MB/s) - 'breed.dist.txt' saved [923/923]
##
## --2016-02-12 05:12:32-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/breed.design.txt
## Resolving raw.githubusercontent.com... 23.235.40.133
## Connecting to raw.githubusercontent.com|23.235.40.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 107 [text/plain]
## Saving to: 'breed.design.txt'
##
## 0K 100% 8.50M=0s
##
## 2016-02-12 05:12:32 (8.50 MB/s) - 'breed.design.txt' saved [107/107]
##
## --2016-02-12 05:12:32-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/method.dist.txt
## Resolving raw.githubusercontent.com... 23.235.40.133
## Connecting to raw.githubusercontent.com|23.235.40.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 1149 (1.1K) [text/plain]
## Saving to: 'method.dist.txt'
##
## 0K . 100% 64.5M=0s
##
## 2016-02-12 05:12:33 (64.5 MB/s) - 'method.dist.txt' saved [1149/1149]
##
## --2016-02-12 05:12:33-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/method.design.txt
## Resolving raw.githubusercontent.com... 23.235.40.133
## Connecting to raw.githubusercontent.com|23.235.40.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 119 [text/plain]
## Saving to: 'method.design.txt'
##
## 0K 100% 4.73M=0s
##
## 2016-02-12 05:12:33 (4.73 MB/s) - 'method.design.txt' saved [119/119]
mothur "#homova(phylip=breed.dist.txt, design=breed.design.txt)"
mothur "#homova(phylip=method.dist.txt, design=method.design.txt)"
## [H[2J
##
##
##
##
##
## mothur v.1.36.1
## Last updated: 7/27/2015
##
## by
## Patrick D. Schloss
##
## Department of Microbiology & Immunology
## University of Michigan
## pschloss@umich.edu
## http://www.mothur.org
##
## When using, please cite:
## Schloss, P.D., et al., Introducing mothur: Open-source, platform-independent, community-supported software for describing and comparing microbial communities. Appl Environ Microbiol, 2009. 75(23):7537-41.
##
## Distributed under the GNU General Public License
##
## Type 'help()' for information on the commands that are available
##
## Type 'quit()' to exit program
##
##
##
## mothur > homova(phylip=breed.dist.txt, design=breed.design.txt)
## HOMOVA BValue P-value SSwithin/(Ni-1)_values
## Holstein-Jersey 0.0805971 0.254 0.0772381 0.107066
## Experiment-wise error rate: 0.05
## If you have borderline P-values, you should try increasing the number of iterations
##
## Output File Names:
## breed.dist.homova
##
##
## mothur > quit()
## [H[2J
##
##
##
##
##
## mothur v.1.36.1
## Last updated: 7/27/2015
##
## by
## Patrick D. Schloss
##
## Department of Microbiology & Immunology
## University of Michigan
## pschloss@umich.edu
## http://www.mothur.org
##
## When using, please cite:
## Schloss, P.D., et al., Introducing mothur: Open-source, platform-independent, community-supported software for describing and comparing microbial communities. Appl Environ Microbiol, 2009. 75(23):7537-41.
##
## Distributed under the GNU General Public License
##
## Type 'help()' for information on the commands that are available
##
## Type 'quit()' to exit program
##
##
##
## mothur > homova(phylip=method.dist.txt, design=method.design.txt)
## HOMOVA BValue P-value SSwithin/(Ni-1)_values
## cannula-tubing 0.0796527 0.285 0.0572247 0.0772381
## Experiment-wise error rate: 0.05
## If you have borderline P-values, you should try increasing the number of iterations
##
## Output File Names:
## method.dist.homova
##
##
## mothur > quit()
Make heat map of all core OTUs
mkdir heatmap
biom convert -i final_core.biom -o heatmap/final_core_json.biom --to-json --table-type="OTU table"
library(Heatplus)
library(RColorBrewer)
otu_core <- read_biom("heatmap/final_core_json.biom")
core_table <- as.data.frame(as(biom_data(otu_core), "matrix"))
colnames(core_table) <- c("Holstein5", "Holstein3", "Jersey3", "Holstein1",
"Jersey4", "Holstein2", "Holstein3C", "Holstein4C", "Holstein4", "Holstein5C",
"Jersey1", "Holstein2C", "Jersey2", "Holstein1C")
core_table_trans <- as.data.frame(t(core_table))
data_propor <- core_table_trans/rowSums(core_table_trans)
scalewhiteblack <- colorRampPalette(c("white", "black"), space = "rgb")(100)
maxab <- apply(data_propor, 2, max)
n1 <- names(which(maxab < 0.01))
data_abun <- data_propor[, -which(names(data_propor) %in% n1)]
data.dist <- vegdist(data_propor, method = "bray")
row.clus <- hclust(data.dist, "aver")
data.dist.g <- vegdist(t(data_abun), method = "bray")
col.clus <- hclust(data.dist.g, "aver")
png("FigS5.png", height = 8, width = 8, units = "in", res = 300)
heatmap.2(as.matrix(data_abun), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus),
col = scalewhiteblack, margins = c(2, 8), trace = "none", density.info = "none",
labCol = "", xlab = "OTUs", ylab = "Samples", lhei = c(2, 8))
dev.off()
## quartz_off_screen
## 2
pdf("FigS5.pdf", height = 8, width = 8)
heatmap.2(as.matrix(data_abun), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus),
col = scalewhiteblack, margins = c(2, 8), trace = "none", density.info = "none",
labCol = "", xlab = "OTUs", ylab = "Samples", lhei = c(2, 8))
dev.off()
## quartz_off_screen
## 2
LEfSe was used to determine different OTUs between breeds and sampling methods
biom convert -i filter_breed.biom -o filter_breed_json.biom --to-json --table-type="OTU table"
biom convert -i filter_method.biom -o filter_method_json.biom --to-json --table-type="OTU table"
Create files with LEfSe format
breed_biom <- read_biom("filter_breed_json.biom")
breed_table <- as.data.frame(as(biom_data(breed_biom), "matrix"))
breed_rel <- sweep(breed_table, 2, colSums(breed_table), FUN = "/")
rownames(breed_rel) <- paste("OTU", rownames(breed_rel), sep = "")
breed_rel2 <- breed_rel[0, ]
breed_rel2[nrow(breed_rel2) + 1, ] <- c("Holstein", "Holstein", "Jersey", "Holstein",
"Jersey", "Holstein", "Holstein", "Jersey", "Jersey")
breed_rel2[nrow(breed_rel2) + 1, ] <- c("Holstein5", "Holstein3", "Jersey3",
"Holstein1", "Jersey4", "Holstein2", "Holstein4", "Jersey1", "Jersey2")
row.names(breed_rel2) <- c("breed", "ID")
breed_abundance <- rbind(breed_rel2, breed_rel)
write.table(breed_abundance, sep = "\t", file = "breed_abundance.txt", row.names = TRUE,
col.names = FALSE, quote = FALSE)
method_biom <- read_biom("filter_method_json.biom")
method_table <- as.data.frame(as(biom_data(method_biom), "matrix"))
method_rel <- sweep(method_table, 2, colSums(method_table), FUN = "/")
rownames(method_rel) <- paste("OTU", rownames(method_rel), sep = "")
method_rel2 <- method_rel[0, ]
method_rel2[nrow(method_rel2) + 1, ] <- c("Tubing", "Tubing", "Tubing", "Tubing",
"Cannula", "Cannula", "Tubing", "Cannula", "Cannula", "Cannula")
method_rel2[nrow(method_rel2) + 1, ] <- c("Holstein5", "Holstein3", "Holstein1",
"Holstein2", "Holstein3C", "Holstein4C", "Holstein4", "Holstein5C", "Holstein2C",
"Holstein1C")
row.names(method_rel2) <- c("method", "ID")
method_abundance <- rbind(method_rel2, method_rel)
write.table(method_abundance, sep = "\t", file = "method_abundance.txt", row.names = TRUE,
col.names = FALSE, quote = FALSE)
LEfSe was used to determine OTUs that differ
wget https://bitbucket.org/nsegata/lefse/get/default.zip -O lefse.zip
unzip lefse.zip
mv nsegata* lefse
python lefse/format_input.py breed_abundance.txt breed_lefse_format.txt -c 1 -u 2 -o 1000000
python lefse/run_lefse.py breed_lefse_format.txt breed_lefse_result.txt
python lefse/format_input.py method_abundance.txt method_lefse_format.txt -c 1 -u 2 -o 1000000
python lefse/run_lefse.py method_lefse_format.txt method_lefse_result.txt
## --2016-02-12 05:12:38-- https://bitbucket.org/nsegata/lefse/get/default.zip
## Resolving bitbucket.org... 104.192.143.2, 104.192.143.3, 104.192.143.1
## Connecting to bitbucket.org|104.192.143.2|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 60035 (59K) [application/zip]
## Saving to: 'lefse.zip'
##
## 0K .......... .......... .......... .......... .......... 85% 347K 0s
## 50K ........ 100% 2.18M=0.1s
##
## 2016-02-12 05:12:39 (397 KB/s) - 'lefse.zip' saved [60035/60035]
##
## Archive: lefse.zip
## inflating: nsegata-lefse-e3cabe93a0d1/.hg_archival.txt
## inflating: nsegata-lefse-e3cabe93a0d1/.hgtags
## inflating: nsegata-lefse-e3cabe93a0d1/__init__.py
## inflating: nsegata-lefse-e3cabe93a0d1/example/run.sh
## inflating: nsegata-lefse-e3cabe93a0d1/format_input.py
## inflating: nsegata-lefse-e3cabe93a0d1/lefse.py
## inflating: nsegata-lefse-e3cabe93a0d1/lefse2circlader.py
## inflating: nsegata-lefse-e3cabe93a0d1/lefsebiom/AbundanceTable.py
## inflating: nsegata-lefse-e3cabe93a0d1/lefsebiom/CClade.py
## inflating: nsegata-lefse-e3cabe93a0d1/lefsebiom/ConstantsBreadCrumbs.py
## inflating: nsegata-lefse-e3cabe93a0d1/lefsebiom/ValidateData.py
## inflating: nsegata-lefse-e3cabe93a0d1/lefsebiom/__init__.py
## inflating: nsegata-lefse-e3cabe93a0d1/plot_cladogram.py
## inflating: nsegata-lefse-e3cabe93a0d1/plot_features.py
## inflating: nsegata-lefse-e3cabe93a0d1/plot_res.py
## inflating: nsegata-lefse-e3cabe93a0d1/qiime2lefse.py
## inflating: nsegata-lefse-e3cabe93a0d1/requirements.txt
## inflating: nsegata-lefse-e3cabe93a0d1/run_lefse.py
## Number of significantly discriminative features: 212 ( 212 ) before internal wilcoxon
## Number of discriminative features with abs LDA score > 2.0 : 181
## Number of significantly discriminative features: 62 ( 62 ) before internal wilcoxon
## Number of discriminative features with abs LDA score > 2.0 : 48
breed_lefse <- read.table("breed_lefse_result.txt", header = FALSE, sep = "\t")
breed_lefse <- breed_lefse[complete.cases(breed_lefse), ]
breed_lefse$V1 <- gsub("OTU", "", breed_lefse$V1)
write.table(breed_lefse$V1, file = "breed_lefse_otus.txt", row.names = FALSE,
col.names = FALSE, quote = FALSE)
method_lefse <- read.table("method_lefse_result.txt", header = FALSE, sep = "\t")
method_lefse <- method_lefse[complete.cases(method_lefse), ]
method_lefse$V1 <- gsub("OTU", "", method_lefse$V1)
write.table(method_lefse$V1, file = "method_lefse_otus.txt", row.names = FALSE,
col.names = FALSE, quote = FALSE)
Create input files to generate heat maps
biom convert -i filter_breed.biom -o heatmap/filter_breed.txt --to-tsv --header-key taxonomy
biom convert -i filter_method.biom -o heatmap/filter_method.txt --to-tsv --header-key taxonomy
awk '{gsub("; ","\t",$0); print;}' heatmap/filter_breed.txt | awk '{gsub("#OTU","OTU",$0); print;}' | cut -f-10,15 | tail -n +2 | awk '{if(NR==1){print $0,"\ttaxonomy"}else{print }}' > heatmap/breed_tax.txt
awk '{gsub("; ","\t",$0); print;}' heatmap/filter_method.txt | awk '{gsub("#OTU","OTU",$0); print;}' | cut -f-11,16 | tail -n +2 | awk '{if(NR==1){print $0,"\ttaxonomy"}else{print }}' > heatmap/method_tax.txt
# breed
breed_otu <- read.table("heatmap/breed_tax.txt", header = T, sep = "\t", fill = TRUE)
breed_otu$taxonomy <- sub("f__", "", breed_otu$taxonomy)
breed_otu$taxonomy <- sub("\\]", "", breed_otu$taxonomy)
breed_otu$taxonomy <- sub("\\[", "", breed_otu$taxonomy)
breed_otu$taxonomy <- sub("^$", "No Assigned Family", breed_otu$taxonomy)
colnames(breed_otu) <- c("OTUs", "Holstein5", "Holstein3", "Jersey3", "Holstein1",
"Jersey4", "Holstein2", "Holstein4", "Jersey1", "Jersey2", "taxonomy")
OTU.ID <- breed_otu[, 1]
taxonomy <- breed_otu[, 11]
breed_samples <- breed_otu[, -11]
row.names(breed_samples) <- breed_samples$OTUs
breed_samples <- breed_samples[, -1]
breed_trans <- as.data.frame(t(breed_samples))
breed_propor <- breed_trans/rowSums(breed_trans)
breed_propor_trans <- as.data.frame(t(breed_propor))
breed_proportion <- data.frame(OTU.ID, breed_propor_trans, taxonomy)
write.table(breed_proportion, file = "heatmap/breed_propor.txt", sep = "\t",
row.names = F, col.names = T, quote = F)
# method
method_otu <- read.table("heatmap/method_tax.txt", header = T, sep = "\t", fill = TRUE)
method_otu$taxonomy <- sub("f__", "", method_otu$taxonomy)
method_otu$taxonomy <- sub("\\]", "", method_otu$taxonomy)
method_otu$taxonomy <- sub("\\[", "", method_otu$taxonomy)
method_otu$taxonomy <- sub("^$", "No Assigned Family", method_otu$taxonomy)
colnames(method_otu) <- c("OTUs", "Holstein5", "Holstein3", "Holstein1", "Holstein2",
"Holstein3C", "Holstein4C", "Holstein4", "Holstein5C", "Holstein2C", "Holstein1C",
"taxonomy")
OTU.ID <- method_otu[, 1]
taxonomy <- method_otu[, 12]
method_samples <- method_otu[, -12]
row.names(method_samples) <- method_samples$OTUs
method_samples <- method_samples[, -1]
method_trans <- as.data.frame(t(method_samples))
method_propor <- method_trans/rowSums(method_trans)
method_propor_trans <- as.data.frame(t(method_propor))
method_proportion <- data.frame(OTU.ID, method_propor_trans, taxonomy)
write.table(method_proportion, file = "heatmap/method_propor.txt", sep = "\t",
row.names = F, col.names = T, quote = F)
awk '{gsub("OTU.ID","#OTU ID",$0); print;}' heatmap/breed_propor.txt> heatmap/breed_propor_final.txt
awk '{gsub("OTU.ID","#OTU ID",$0); print;}' heatmap/method_propor.txt> heatmap/method_propor_final.txt
biom convert -i heatmap/breed_propor_final.txt -o heatmap/breed_propor_final.biom --to-json --table-type="OTU table" --process-obs-metadata taxonomy
biom convert -i heatmap/method_propor_final.txt -o heatmap/method_propor_final.biom --to-json --table-type="OTU table" --process-obs-metadata taxonomy
filter_otus_from_otu_table.py -i heatmap/breed_propor_final.biom -o heatmap/breed_lefse.biom -e breed_lefse_otus.txt --negate_ids_to_exclude
filter_otus_from_otu_table.py -i heatmap/method_propor_final.biom -o heatmap/method_lefse.biom -e method_lefse_otus.txt --negate_ids_to_exclude
biom convert -i heatmap/breed_lefse.biom -o heatmap/breed_lefse.txt --to-tsv --header-key taxonomy
biom convert -i heatmap/method_lefse.biom -o heatmap/method_lefse.txt --to-tsv --header-key taxonomy
awk '{gsub("#OTU ID","OTUs",$0); print;}' heatmap/breed_lefse.txt> heatmap/breed_lefse_final.txt
awk '{gsub("#OTU ID","OTUs",$0); print;}' heatmap/method_lefse.txt> heatmap/method_lefse_final.txt
Make heat maps
# breed all
breed_lefse_all <- read.table("heatmap/breed_lefse_final.txt", header = T, sep = "\t")
breed_lefse_data <- breed_lefse_all[, -11]
row.names(breed_lefse_data) <- breed_lefse_data$OTUs
breed_lefse_data <- breed_lefse_data[, -1]
breed_all_trans <- as.data.frame(t(breed_lefse_data))
scalewhiteblack <- colorRampPalette(c("white", "black"), space = "rgb")(100)
data.dist <- vegdist(breed_all_trans, method = "bray")
row.clus <- hclust(data.dist, "aver")
data.dist.g <- vegdist(t(breed_all_trans), method = "bray")
col.clus <- hclust(data.dist.g, "aver")
png("Fig3a.png", height = 8, width = 8, units = "in", res = 300)
heatmap.2(as.matrix(breed_all_trans), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus),
col = scalewhiteblack, margins = c(2, 8), trace = "none", density.info = "none",
labCol = "", xlab = "OTUs", ylab = "Samples", lhei = c(2, 8))
dev.off()
## quartz_off_screen
## 2
pdf("Fig3a.pdf", height = 8, width = 8)
heatmap.2(as.matrix(breed_all_trans), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus),
col = scalewhiteblack, margins = c(2, 8), trace = "none", density.info = "none",
labCol = "", xlab = "OTUs", ylab = "Samples", lhei = c(2, 8))
dev.off()
## quartz_off_screen
## 2
# breed most abundant
breed_lefse <- read.table("heatmap/breed_lefse_final.txt", header = T, sep = "\t")
row.names(breed_lefse) <- breed_lefse$OTUs
breed_lefse <- breed_lefse[, -1]
breed_lefse_tax <- subset(breed_lefse, select = c(taxonomy))
breed_lefse_samples <- breed_lefse[, -10]
breed_lefse_trans <- as.data.frame(t(breed_lefse_samples))
scalewhiteblack <- colorRampPalette(c("white", "black"), space = "rgb")(100)
maxab <- apply(breed_lefse_trans, 2, max)
n1 <- names(which(maxab < 0.01))
breed_abun <- breed_lefse_trans[, -which(names(breed_lefse_trans) %in% n1)]
data.dist <- vegdist(breed_lefse_trans, method = "bray")
row.clus <- hclust(data.dist, "aver")
breeds_abun_dist <- vegdist(t(breed_abun), method = "bray")
col.clus <- hclust(breeds_abun_dist, "aver")
breed_abun_trans <- as.data.frame(t(breed_abun))
merge_data <- merge(breed_abun_trans, breed_lefse_tax, by = "row.names")
row.names(merge_data) <- merge_data$Row.names
merge_data <- merge_data[, -1]
breed_abun_fam <- subset(merge_data, select = c(taxonomy))
png("Fig3b.png", height = 8, width = 8, units = "in", res = 300)
heatmap.2(as.matrix(breed_abun), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus),
col = scalewhiteblack, margins = c(10, 7), trace = "none", density.info = "none",
labCol = breed_abun_fam$taxonomy, xlab = "Family", ylab = "Samples", lhei = c(2,
8))
dev.off()
## quartz_off_screen
## 2
pdf("Fig3b.pdf", height = 8, width = 8)
heatmap.2(as.matrix(breed_abun), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus),
col = scalewhiteblack, margins = c(10, 7), trace = "none", density.info = "none",
labCol = breed_abun_fam$taxonomy, xlab = "Family", ylab = "Samples", lhei = c(2,
8))
dev.off()
## quartz_off_screen
## 2